Skip to content

Commit

Permalink
backupccl: do not backup or restore stat forecasts
Browse files Browse the repository at this point in the history
Statistics forecasts are not persisted and are reconstructed on demand
from the persisted statistics. As such, they don't need to be included
in a backup or restored.

This has the additional effect of solving a bug in which we would fail
to write statistics into our metadata SST if it included more than 1
forecast since the in-memory forecasts have a 0 statistic_id.

Fixes cockroachdb#86806

Epic: none

Release note: None
  • Loading branch information
stevendanna committed Jan 11, 2023
1 parent 8d1865f commit 9059e80
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 59 deletions.
73 changes: 46 additions & 27 deletions pkg/ccl/backupccl/backup_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,39 +349,14 @@ func backup(
return roachpb.RowCount{}, err
}

var tableStatistics []*stats.TableStatisticProto
for i := range backupManifest.Descriptors {
if tbl, _, _, _, _ := descpb.GetDescriptors(&backupManifest.Descriptors[i]); tbl != nil {
tableDesc := tabledesc.NewBuilder(tbl).BuildImmutableTable()
// Collect all the table stats for this table.
tableStatisticsAcc, err := statsCache.GetTableStats(ctx, tableDesc)
if err != nil {
// Successfully backed up data is more valuable than table stats that can
// be recomputed after restore, and so if we fail to collect the stats of a
// table we do not want to mark the job as failed.
// The lack of stats on restore could lead to suboptimal performance when
// reading/writing to this table until the stats have been recomputed.
log.Warningf(ctx, "failed to collect stats for table: %s, "+
"table ID: %d during a backup: %s", tableDesc.GetName(), tableDesc.GetID(),
err.Error())
continue
}
for _, stat := range tableStatisticsAcc {
tableStatistics = append(tableStatistics, &stat.TableStatisticProto)
}
}
}
statsTable := backuppb.StatsTable{
Statistics: tableStatistics,
}

statsTable := getTableStatsForBackup(ctx, statsCache, backupManifest.Descriptors)
if err := backupinfo.WriteTableStatistics(ctx, defaultStore, encryption, &kmsEnv, &statsTable); err != nil {
return roachpb.RowCount{}, err
}

if backupinfo.WriteMetadataSST.Get(&settings.SV) {
if err := backupinfo.WriteBackupMetadataSST(ctx, defaultStore, encryption, &kmsEnv, backupManifest,
tableStatistics); err != nil {
statsTable.Statistics); err != nil {
err = errors.Wrap(err, "writing forward-compat metadata sst")
if !build.IsRelease() {
return roachpb.RowCount{}, err
Expand Down Expand Up @@ -410,6 +385,50 @@ func releaseProtectedTimestamp(
return err
}

// getTableStatsForBackup collects all stats for tables found in descs.
//
// We do not fail if we can't retrieve statistiscs. Successfully
// backed up data is more valuable than table stats that can be
// recomputed after restore. The lack of stats on restore could lead
// to suboptimal performance when reading/writing to this table until
// the stats have been recomputed.
func getTableStatsForBackup(
ctx context.Context, statsCache *stats.TableStatisticsCache, descs []descpb.Descriptor,
) backuppb.StatsTable {
var tableStatistics []*stats.TableStatisticProto
for i := range descs {
if tbl, _, _, _, _ := descpb.GetDescriptors(&descs[i]); tbl != nil {
tableDesc := tabledesc.NewBuilder(tbl).BuildImmutableTable()
tableStatisticsAcc, err := statsCache.GetTableStats(ctx, tableDesc)
if err != nil {
log.Warningf(ctx, "failed to collect stats for table: %s, "+
"table ID: %d during a backup: %s", tableDesc.GetName(), tableDesc.GetID(),
err.Error())
continue
}

for _, stat := range tableStatisticsAcc {
if statShouldBeIncludedInBackupRestore(&stat.TableStatisticProto) {
tableStatistics = append(tableStatistics, &stat.TableStatisticProto)
}
}
}
}
return backuppb.StatsTable{
Statistics: tableStatistics,
}
}

func statShouldBeIncludedInBackupRestore(stat *stats.TableStatisticProto) bool {
// Forecasts and merged stats are computed from the persisted
// stats on demand and do not need to be backed up or
// restored.
if stat.Name == jobspb.ForecastStatsName || stat.Name == jobspb.MergedStatsName {
return false
}
return true
}

type backupResumer struct {
job *jobs.Job
backupStats roachpb.RowCount
Expand Down
6 changes: 3 additions & 3 deletions pkg/ccl/backupccl/backupinfo/backup_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,12 +500,12 @@ func writeStatsToMetadata(
return stats[i].TableID < stats[j].TableID || (stats[i].TableID == stats[j].TableID && stats[i].StatisticID < stats[j].StatisticID)
})

for _, i := range stats {
b, err := protoutil.Marshal(i)
for _, s := range stats {
b, err := protoutil.Marshal(s)
if err != nil {
return err
}
if err := sst.PutUnversioned(encodeStatSSTKey(i.TableID, i.StatisticID), b); err != nil {
if err := sst.PutUnversioned(encodeStatSSTKey(s.TableID, s.StatisticID), b); err != nil {
return err
}
}
Expand Down
8 changes: 1 addition & 7 deletions pkg/ccl/backupccl/full_cluster_backup_restore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -609,12 +609,6 @@ func TestClusterRestoreFailCleanup(t *testing.T) {
_, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, InitManualReplication)
defer cleanupFn()

// This test flakes due to
// https://github.com/cockroachdb/cockroach/issues/86806. Instead of skipping
// the test all together, setting the cluster setting to false which triggers
// the failure.
sqlDB.Exec(t, "SET CLUSTER SETTING kv.bulkio.write_metadata_sst.enabled=false")

// Setup the system systemTablesToVerify to ensure that they are copied to the new cluster.
// Populate system.users.
for i := 0; i < 1000; i++ {
Expand Down Expand Up @@ -1078,7 +1072,7 @@ func TestRestoreWithRecreatedDefaultDB(t *testing.T) {

sqlDB.Exec(t, `
DROP DATABASE defaultdb;
CREATE DATABASE defaultdb;
CREATE DATABASE defaultdb;
`)
sqlDB.Exec(t, `BACKUP TO $1`, localFoo)

Expand Down
31 changes: 18 additions & 13 deletions pkg/ccl/backupccl/restore_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,13 +524,16 @@ func (r *restoreResumer) ForceRealSpan() bool {
return true
}

// remapRelevantStatistics changes the table ID references in the stats
// from those they had in the backed up database to what they should be
// in the restored database.
// It also selects only the statistics which belong to one of the tables
// being restored. If the descriptorRewrites can re-write the table ID, then that
// table is being restored.
func remapRelevantStatistics(
// remapAndFilterRelevantStatistics changes the table ID references in
// the stats from those they had in the backed up database to what
// they should be in the restored database.
//
// It also selects only the statistics which belong to one of the
// tables being restored. If the descriptorRewrites can re-write the
// table ID, then that table is being restored.
//
// Any statistics forecasts are ignored.
func remapAndFilterRelevantStatistics(
ctx context.Context,
tableStatistics []*stats.TableStatisticProto,
descriptorRewrites jobspb.DescRewriteMap,
Expand All @@ -540,11 +543,13 @@ func remapRelevantStatistics(

tableHasStatsInBackup := make(map[descpb.ID]struct{})
for _, stat := range tableStatistics {
tableHasStatsInBackup[stat.TableID] = struct{}{}
if tableRewrite, ok := descriptorRewrites[stat.TableID]; ok {
// Statistics imported only when table re-write is present.
stat.TableID = tableRewrite.ID
relevantTableStatistics = append(relevantTableStatistics, stat)
if statShouldBeIncludedInBackupRestore(stat) {
tableHasStatsInBackup[stat.TableID] = struct{}{}
if tableRewrite, ok := descriptorRewrites[stat.TableID]; ok {
// Statistics imported only when table re-write is present.
stat.TableID = tableRewrite.ID
relevantTableStatistics = append(relevantTableStatistics, stat)
}
}
}

Expand Down Expand Up @@ -1555,7 +1560,7 @@ func (r *restoreResumer) doResume(ctx context.Context, execCtx interface{}) erro
backupStats, err := backupinfo.GetStatisticsFromBackup(ctx, defaultStore, details.Encryption,
&kmsEnv, latestBackupManifest)
if err == nil {
remappedStats = remapRelevantStatistics(ctx, backupStats, details.DescriptorRewrites,
remappedStats = remapAndFilterRelevantStatistics(ctx, backupStats, details.DescriptorRewrites,
details.TableDescs)
} else {
// We don't want to fail the restore if we are unable to resolve statistics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest';
----

exec-sql
SET CLUSTER SETTING kv.bulkio.write_metadata_sst.enabled = false;
----


exec-sql
SET CLUSTER SETTING storage.mvcc.range_tombstones.enabled = false;
----
Expand Down Expand Up @@ -247,10 +242,6 @@ exec-sql
SET CLUSTER SETTING storage.mvcc.range_tombstones.enabled = true;
----

exec-sql
SET CLUSTER SETTING kv.bulkio.write_metadata_sst.enabled = false;
----

# Pause the import job, in order to back up the importing data.
import expect-pausepoint tag=b
IMPORT INTO foo2 (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv')
Expand Down
42 changes: 42 additions & 0 deletions pkg/ccl/backupccl/testdata/backup-restore/metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
new-cluster name=s
----

# Make sure that the presence of forecasted stats doesn't break
# metadata SST writing. This is a regression test for #86806.
#
# We create 3 statistics manually which is the minimum needed to
# create a forecast. Automatic collection is disabled to avoid
# flakiness.
exec-sql
SET CLUSTER SETTING kv.bulkio.write_metadata_sst.enabled = true;
----

exec-sql
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false;
----

exec-sql
CREATE DATABASE db1;
USE db1;
CREATE TABLE tab (a INT PRIMARY KEY, b INT);
----

exec-sql
CREATE STATISTICS __auto__ ON a FROM tab;
CREATE STATISTICS __auto__ ON b FROM tab;
CREATE STATISTICS __auto__ ON a FROM tab;
CREATE STATISTICS __auto__ ON b FROM tab;
CREATE STATISTICS __auto__ ON a FROM tab;
CREATE STATISTICS __auto__ ON b FROM tab;
----

# The previous bug occurs when there are two or more forecasts since
# forecasts do not hava a statistics ID.
query-sql
SELECT count(1) FROM [ SHOW STATISTICS FOR TABLE tab WITH FORECAST ] WHERE statistics_name = '__forecast__'
----
2

exec-sql
BACKUP DATABASE db1 INTO 'nodelocal://0/test/'
----

0 comments on commit 9059e80

Please sign in to comment.