diff --git a/pkg/ccl/logictestccl/testdata/logic_test/multi_region_stats b/pkg/ccl/logictestccl/testdata/logic_test/multi_region_stats new file mode 100644 index 000000000000..be4c4e4db4e6 --- /dev/null +++ b/pkg/ccl/logictestccl/testdata/logic_test/multi_region_stats @@ -0,0 +1,127 @@ +# LogicTest: multiregion-9node-3region-3azs + +query TTTTT colnames,rowsort +SHOW REGIONS +---- +region zones database_names primary_region_of secondary_region_of +ap-southeast-2 {ap-az1,ap-az2,ap-az3} {} {} {} +ca-central-1 {ca-az1,ca-az2,ca-az3} {} {} {} +us-east-1 {us-az1,us-az2,us-az3} {} {} {} + +query TT colnames,rowsort +SHOW REGIONS FROM CLUSTER +---- +region zones +ap-southeast-2 {ap-az1,ap-az2,ap-az3} +ca-central-1 {ca-az1,ca-az2,ca-az3} +us-east-1 {us-az1,us-az2,us-az3} + +# Regression test for #124181: check that we re-load table statistics after +# running ALTER DATABASE ADD REGION. + +statement ok +CREATE DATABASE db124181 PRIMARY REGION "ap-southeast-2" REGIONS "us-east-1" SURVIVE ZONE FAILURE + +statement ok +USE db124181 + +query TTTT +SHOW ENUMS +---- +public crdb_internal_region {ap-southeast-2,us-east-1} root + +statement ok +CREATE TABLE t124181 ( + region crdb_internal_region NOT NULL, + id UUID NOT NULL DEFAULT gen_random_uuid(), + a INT NOT NULL, + PRIMARY KEY (id), + UNIQUE INDEX (a) +) LOCALITY REGIONAL BY ROW AS region + +statement ok +INSERT INTO t124181 (region, a) VALUES ('ap-southeast-2', 0), ('us-east-1', 1) + +statement ok +ANALYZE t124181 + +let $hist_id_1 +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t124181] WHERE column_names = ARRAY['region'] + +query TIRI colnames,nosort +SHOW HISTOGRAM $hist_id_1 +---- +upper_bound range_rows distinct_range_rows equal_rows +'ap-southeast-2' 0 0 1 +'us-east-1' 0 0 1 + +query T +SELECT jsonb_pretty(stat->'histo_buckets') +FROM ( + SELECT jsonb_array_elements(statistics) AS stat + FROM [SHOW STATISTICS USING JSON FOR TABLE t124181] +) +WHERE stat->>'columns' = '["region"]' +---- +[ + { + "distinct_range": 0, + "num_eq": 1, + "num_range": 0, + "upper_bound": "ap-southeast-2" + }, + { + "distinct_range": 0, + "num_eq": 1, + "num_range": 0, + "upper_bound": "us-east-1" + } +] + +# Implicitly add a value to the crdb_internal_region enum. +statement ok +ALTER DATABASE db124181 ADD REGION "ca-central-1" + +query TTTT +SHOW ENUMS +---- +public crdb_internal_region {ap-southeast-2,ca-central-1,us-east-1} root + +# Make sure we can still SHOW STATISTICS and SHOW HISTOGRAM. +let $hist_id_2 +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t124181] WHERE column_names = ARRAY['region'] + +query TIRI colnames,nosort +SHOW HISTOGRAM $hist_id_2 +---- +upper_bound range_rows distinct_range_rows equal_rows +'ap-southeast-2' 0 0 1 +'us-east-1' 0 0 1 + +# Make sure we can still SHOW STATISTICS USING JSON. +query T +SELECT jsonb_pretty(stat->'histo_buckets') +FROM ( + SELECT jsonb_array_elements(statistics) AS stat + FROM [SHOW STATISTICS USING JSON FOR TABLE t124181] +) +WHERE stat->>'columns' = '["region"]' +---- +[ + { + "distinct_range": 0, + "num_eq": 1, + "num_range": 0, + "upper_bound": "ap-southeast-2" + }, + { + "distinct_range": 0, + "num_eq": 1, + "num_range": 0, + "upper_bound": "us-east-1" + } +] + +# Make sure we can still use the histogram in statistics_builder. +statement ok +INSERT INTO t124181 (region, a) VALUES ('ca-central-1', 2) diff --git a/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go b/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go index db7f710703f3..1173bf3b3f67 100644 --- a/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go +++ b/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go @@ -170,6 +170,13 @@ func TestCCLLogic_multi_region_show( runCCLLogicTest(t, "multi_region_show") } +func TestCCLLogic_multi_region_stats( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "multi_region_stats") +} + func TestCCLLogic_multi_region_zone_config_extensions( t *testing.T, ) { diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 7f4766ceb275..18eea2c48e8b 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -824,7 +824,7 @@ func (r *Refresher) maybeRefreshStats( rowsAffected int64, asOf time.Duration, ) { - tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */) + tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */, nil /* udtCols */) if err != nil { log.Errorf(ctx, "failed to get table statistics: %v", err) return diff --git a/pkg/sql/stats/delete_stats_test.go b/pkg/sql/stats/delete_stats_test.go index 01e7c45cd0f5..1d290be019e6 100644 --- a/pkg/sql/stats/delete_stats_test.go +++ b/pkg/sql/stats/delete_stats_test.go @@ -262,7 +262,9 @@ func TestDeleteOldStatsForColumns(t *testing.T) { } return testutils.SucceedsSoonError(func() error { - tableStats, err := cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */) + tableStats, err := cache.getTableStatsFromCache( + ctx, tableID, nil /* forecast */, nil, /* udtCols */ + ) if err != nil { return err } @@ -270,7 +272,9 @@ func TestDeleteOldStatsForColumns(t *testing.T) { for i := range testData { stat := &testData[i] if stat.TableID != tableID { - stats, err := cache.getTableStatsFromCache(ctx, stat.TableID, nil /* forecast */) + stats, err := cache.getTableStatsFromCache( + ctx, stat.TableID, nil /* forecast */, nil, /* udtCols */ + ) if err != nil { return err } @@ -554,7 +558,9 @@ func TestDeleteOldStatsForOtherColumns(t *testing.T) { } return testutils.SucceedsSoonError(func() error { - tableStats, err := cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */) + tableStats, err := cache.getTableStatsFromCache( + ctx, tableID, nil /* forecast */, nil, /* udtCols */ + ) if err != nil { return err } @@ -562,7 +568,9 @@ func TestDeleteOldStatsForOtherColumns(t *testing.T) { for i := range testData { stat := &testData[i] if stat.TableID != tableID { - stats, err := cache.getTableStatsFromCache(ctx, stat.TableID, nil /* forecast */) + stats, err := cache.getTableStatsFromCache( + ctx, stat.TableID, nil /* forecast */, nil, /* udtCols */ + ) if err != nil { return err } diff --git a/pkg/sql/stats/stats_cache.go b/pkg/sql/stats/stats_cache.go index 680b68f5e1a8..084a63d90b90 100644 --- a/pkg/sql/stats/stats_cache.go +++ b/pkg/sql/stats/stats_cache.go @@ -107,6 +107,11 @@ type cacheEntry struct { // forecast is true if stats could contain forecasts. forecast bool + // userDefinedTypes holds the hydrated user-defined types used in + // histograms. A change to one of these types requires evicting the cacheEntry + // so that we can re-hydrate them. + userDefinedTypes map[descpb.ColumnID]*types.T + stats []*TableStatistic // err is populated if the internal query to retrieve stats hit an error. @@ -231,7 +236,9 @@ func (sc *TableStatisticsCache) GetTableStats( } }() forecast := forecastAllowed(table, sc.settings) - return sc.getTableStatsFromCache(ctx, table.GetID(), &forecast) + return sc.getTableStatsFromCache( + ctx, table.GetID(), &forecast, table.UserDefinedTypeColumns(), + ) } func statsDisallowedSystemTable(tableID descpb.ID) bool { @@ -312,15 +319,14 @@ func forecastAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Set // getTableStatsFromCache is like GetTableStats but assumes that the table ID // is safe to fetch statistics for: non-system, non-virtual, non-view, etc. func (sc *TableStatisticsCache) getTableStatsFromCache( - ctx context.Context, tableID descpb.ID, forecast *bool, + ctx context.Context, tableID descpb.ID, forecast *bool, udtCols []catalog.Column, ) ([]*TableStatistic, error) { sc.mu.Lock() defer sc.mu.Unlock() if found, e := sc.lookupStatsLocked(ctx, tableID, false /* stealthy */); found { - if forecast != nil && e.forecast != *forecast { - // Forecasting was recently enabled or disabled on this table. Evict the - // cache entry and build it again. + if e.isStale(forecast, udtCols) { + // Evict the cache entry and build it again. sc.mu.cache.Del(tableID) } else { return e.stats, e.err @@ -330,6 +336,30 @@ func (sc *TableStatisticsCache) getTableStatsFromCache( return sc.addCacheEntryLocked(ctx, tableID, forecast != nil && *forecast) } +// isStale checks whether we need to evict and re-load the cache entry. +func (e *cacheEntry) isStale(forecast *bool, udtCols []catalog.Column) bool { + // Check whether forecast settings have changed. + if forecast != nil && e.forecast != *forecast { + return true + } + // Check whether user-defined types have changed (this is similar to + // UserDefinedTypeColsHaveSameVersion). + for _, col := range udtCols { + colType := col.GetType() + if histType, ok := e.userDefinedTypes[col.GetID()]; ok { + if histType.Oid() != colType.Oid() { + // This should never be true, but if it is, we'll catch it in + // optTableStat.init and ignore the statistic. For now just skip it. + continue + } + if histType.TypeMeta.Version != colType.TypeMeta.Version { + return true + } + } + } + return false +} + // lookupStatsLocked retrieves any existing stats for the given table. // // If another goroutine is in the process of retrieving the same stats, this @@ -390,17 +420,18 @@ func (sc *TableStatisticsCache) addCacheEntryLocked( sc.mu.cache.Add(tableID, e) sc.mu.numInternalQueries++ + var udts map[descpb.ColumnID]*types.T func() { sc.mu.Unlock() defer sc.mu.Lock() log.VEventf(ctx, 1, "reading statistics for table %d", tableID) - stats, err = sc.getTableStatsFromDB(ctx, tableID, forecast) + stats, udts, err = sc.getTableStatsFromDB(ctx, tableID, forecast) log.VEventf(ctx, 1, "finished reading statistics for table %d", tableID) }() e.mustWait = false - e.forecast, e.stats, e.err = forecast, stats, err + e.forecast, e.userDefinedTypes, e.stats, e.err = forecast, udts, stats, err // Wake up any other callers that are waiting on these stats. e.waitCond.Broadcast() @@ -451,6 +482,7 @@ func (sc *TableStatisticsCache) refreshCacheEntry( forecast := e.forecast var stats []*TableStatistic + var udts map[descpb.ColumnID]*types.T var err error for { func() { @@ -460,7 +492,7 @@ func (sc *TableStatisticsCache) refreshCacheEntry( log.VEventf(ctx, 1, "refreshing statistics for table %d", tableID) // TODO(radu): pass the timestamp and use AS OF SYSTEM TIME. - stats, err = sc.getTableStatsFromDB(ctx, tableID, forecast) + stats, udts, err = sc.getTableStatsFromDB(ctx, tableID, forecast) log.VEventf(ctx, 1, "done refreshing statistics for table %d", tableID) }() if e.lastRefreshTimestamp.Equal(ts) { @@ -470,7 +502,7 @@ func (sc *TableStatisticsCache) refreshCacheEntry( ts = e.lastRefreshTimestamp } - e.stats, e.err = stats, err + e.userDefinedTypes, e.stats, e.err = udts, stats, err e.refreshing = false if err != nil { @@ -628,14 +660,15 @@ func NewTableStatisticProto( // need to run a query to get user defined type metadata. func (sc *TableStatisticsCache) parseStats( ctx context.Context, datums tree.Datums, partialStatisticsColumnsVerActive bool, -) (*TableStatistic, error) { +) (*TableStatistic, *types.T, error) { var tsp *TableStatisticProto var err error tsp, err = NewTableStatisticProto(datums, partialStatisticsColumnsVerActive) if err != nil { - return nil, err + return nil, nil, err } res := &TableStatistic{TableStatisticProto: *tsp} + var udt *types.T if res.HistogramData != nil { // hydrate the type in case any user defined types are present. // There are cases where typ is nil, so don't do anything if so. @@ -656,18 +689,18 @@ func (sc *TableStatisticsCache) parseStats( ) error { resolver := descs.NewDistSQLTypeResolver(txn.Descriptors(), txn.KV()) var err error - res.HistogramData.ColumnType, err = resolver.ResolveTypeByOID(ctx, typ.Oid()) + udt, err = resolver.ResolveTypeByOID(ctx, typ.Oid()) + res.HistogramData.ColumnType = udt return err }); err != nil { - return nil, err + return nil, nil, err } } if err := DecodeHistogramBuckets(res); err != nil { - return nil, err + return nil, nil, err } } - - return res, nil + return res, udt, nil } // DecodeHistogramBuckets decodes encoded HistogramData in tabStat and writes @@ -776,7 +809,7 @@ func (tsp *TableStatisticProto) IsAuto() bool { // type that doesn't exist) and returns the rest (with no error). func (sc *TableStatisticsCache) getTableStatsFromDB( ctx context.Context, tableID descpb.ID, forecast bool, -) ([]*TableStatistic, error) { +) ([]*TableStatistic, map[descpb.ColumnID]*types.T, error) { partialStatisticsColumnsVerActive := sc.settings.Version.IsActive(ctx, clusterversion.V23_1AddPartialStatisticsColumns) var partialPredicateCol string var fullStatisticIDCol string @@ -812,21 +845,37 @@ ORDER BY "createdAt" DESC, "columnIDs" DESC, "statisticID" DESC ctx, "get-table-statistics", nil /* txn */, sessiondata.NodeUserSessionDataOverride, getTableStatisticsStmt, tableID, ) if err != nil { - return nil, err + return nil, nil, err } var statsList []*TableStatistic + var udts map[descpb.ColumnID]*types.T var ok bool for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) { - stats, err := sc.parseStats(ctx, it.Cur(), partialStatisticsColumnsVerActive) + stats, udt, err := sc.parseStats(ctx, it.Cur(), partialStatisticsColumnsVerActive) if err != nil { log.Warningf(ctx, "could not decode statistic for table %d: %v", tableID, err) continue } statsList = append(statsList, stats) + // Keep track of user-defined types used in histograms. + if udt != nil { + // TODO(49698): If we ever support multi-column histograms we'll need to + // build this mapping in a different way. + if len(stats.ColumnIDs) == 1 { + colID := stats.ColumnIDs[0] + if udts == nil { + udts = make(map[descpb.ColumnID]*types.T) + } + // Keep the first type we see for the column. + if _, ok := udts[colID]; !ok { + udts[colID] = udt + } + } + } } if err != nil { - return nil, err + return nil, nil, err } // TODO(faizaanmadhani): Wrap merging behind a boolean so @@ -844,5 +893,5 @@ ORDER BY "createdAt" DESC, "columnIDs" DESC, "statisticID" DESC }) } - return statsList, nil + return statsList, udts, nil } diff --git a/pkg/sql/stats/stats_cache_test.go b/pkg/sql/stats/stats_cache_test.go index 4b867de446ab..73d8cdf687d6 100644 --- a/pkg/sql/stats/stats_cache_test.go +++ b/pkg/sql/stats/stats_cache_test.go @@ -114,7 +114,7 @@ func checkStatsForTable( // Perform the lookup and refresh, and confirm the // returned stats match the expected values. - statsList, err := sc.getTableStatsFromCache(ctx, tableID, nil /* forecast */) + statsList, err := sc.getTableStatsFromCache(ctx, tableID, nil /* forecast */, nil /* udtCols */) if err != nil { t.Fatalf("error retrieving stats: %s", err) } @@ -405,7 +405,7 @@ func TestCacheWait(t *testing.T) { for n := 0; n < 10; n++ { wg.Add(1) go func() { - stats, err := sc.getTableStatsFromCache(ctx, id, nil /* forecast */) + stats, err := sc.getTableStatsFromCache(ctx, id, nil /* forecast */, nil /* udtCols */) if err != nil { t.Error(err) } else if !checkStats(stats, expectedStats[id]) {