From cbe75264529e4aaad824d02ef36c22435d047f9e Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Fri, 23 Sep 2022 10:20:09 -0400 Subject: [PATCH] backupccl: reintroduce previously offline tables with manifest.DescriptorChanges getReintroducedSpans finds all tables included in the current and previous backup that may have undergone a non-mvcc operation. The current backup will then back up these tables' spans from ts = 0, as the previous backup may have missed certain non-mvcc written. To find these tables, getReintroducedSpans must find all tables covered in the previous backup that were in the offline state at previous backup start time. This function assumed that the previous backup's manifest.Descriptors field would contain all tables covered in the previous backup; however, while investigating #88042, we discovered that this assumption is not correct. During revision history backups, a table with an in-progress import (e.g. offline at backup time) can get backed up and included in manifest.DescriptorChanges but not in manifest.Descriptors. This implies that getReintroducedSpans missed reintroducing spans from this table, implying that backup chains have missed backing up some data. This patch fixes getReintroducedSpans to ensure it reintroduces tables included in manifest.DescriptorChanges whose last revision brought the table offline. Release note(bug fix): fix apart of Tech Advisory https://cockroachlabs.atlassian.net/browse/TSE-198 Release justification: bug fix --- pkg/ccl/backupccl/backup_planning.go | 32 ++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pkg/ccl/backupccl/backup_planning.go b/pkg/ccl/backupccl/backup_planning.go index c1bca4d02914..73c5c7fe28c0 100644 --- a/pkg/ccl/backupccl/backup_planning.go +++ b/pkg/ccl/backupccl/backup_planning.go @@ -1023,8 +1023,23 @@ func getReintroducedSpans( ) ([]roachpb.Span, error) { reintroducedTables := make(map[descpb.ID]struct{}) + // First, create a map that indicates which tables from the previous backup + // were offline when the last backup was taken. To create this map, we must + // iterate two fields in the _last_ backup's manifest: + // + // 1. manifest.Descriptors contains a list of descriptors _explicitly_ + // included in the backup, gathered at backup startTime. + // + // 2. manifest.DescriptorChanges contains a list of descriptor changes tracked + // in the backup. While investigating #88042, it was discovered that + // during revision history backups, a table can get included in + // manifest.DescriptorChanges, causing its spans to get backed up, but _not_ + // in manifest.Descriptors. Therefore, to find all descriptors covered in the + // backup that were offline at backup time, we must find all tables in + // manifest.DescriptorChanges whose last change brought the table offline. offlineInLastBackup := make(map[descpb.ID]struct{}) lastBackup := prevBackups[len(prevBackups)-1] + for _, desc := range lastBackup.Descriptors { // TODO(pbardea): Also check that lastWriteTime is set once those are // populated on the table descriptor. @@ -1033,6 +1048,23 @@ func getReintroducedSpans( } } + latestTableDescChangeInLastBackup := make(map[descpb.ID]*descpb.TableDescriptor) + for _, rev := range lastBackup.DescriptorChanges { + if table, _, _, _, _ := descpb.FromDescriptor(rev.Desc); table != nil { + if trackedRev, ok := latestTableDescChangeInLastBackup[table.GetID()]; !ok { + latestTableDescChangeInLastBackup[table.GetID()] = table + } else if trackedRev.Version < table.Version { + latestTableDescChangeInLastBackup[table.GetID()] = table + } + } + } + + for _, table := range latestTableDescChangeInLastBackup { + if table.Offline() { + offlineInLastBackup[table.GetID()] = struct{}{} + } + } + // If the table was offline in the last backup, but becomes PUBLIC, then it // needs to be re-included since we may have missed non-transactional writes. tablesToReinclude := make([]catalog.TableDescriptor, 0)