Skip to content

Commit

Permalink
backupccl: move alloc heavy Files field from manifest to SST
Browse files Browse the repository at this point in the history
Repeated fields in a backup's manifest do not scale well as
the amount of backed up data, and the length of the incremental
chain of backup grows. This has been a problem we have been aware
of, and has motivated us to incrementally move all repeated fields
out of the manifest and into their standalone metadata SST files.
The advantage of this is that during incremental backups or restores
we do not need to perform large allocations when unmarshalling the
manifest, but instead stream results from the relevant SST as and
when we need them. In support issues such as
#93272 we have seen this
unmarshalling step results in OOMs thereby preventing further incremental
backups, or making the backups unrestoreable. Efforts for moving backup,
and restore to have all their metadata in SSTs and rely on streaming reads
and writes is ongoing but outside the scope of this patch.

This patch is meant to be a targetted fix with an eye for backports.
Past experimentation has shown us that the `Files` repeated field in the
manifest is the largest cause of bloated, unmarshalable manifests. This change
teaches backup to continue writing a manifest file, but a slimmer one
with the `Files` field nil'ed out. The values in the `Files` field are
instead written to an SST file that sits alongside the `SLIM_BACKUP_MANIFEST`.
To maintain mixed-version compatability with nodes that rely on a regular
manifest, we continue to write a `BACKUP_MANIFEST` alongside its slim version.
On the read path, we add an optimization that first reads the slim
manifest if present. This way we avoid unmarshalling the alloc heavy
`Files` field, and instead teach all the places in the code that need the
`Files` to reach out to the metadata SST and read the values one by one. To
support both the slim and not-so-slim manifests we introduce an interface
that iterates over the Files depending on the manifest passed to it.

To reiterate, this work is a subset of the improvements we will get
from moving all repeated fields to SSTs and is expected to be superseded
by those efforts when they come to fruition.

Fixes: #93272

Release note (performance improvement): long chains of incremental backups
and restore of such chains will now allocate less memory during the unmarshaling
of metadata
  • Loading branch information
adityamaru committed Jan 9, 2023
1 parent 6c619e2 commit 60f4783
Show file tree
Hide file tree
Showing 10 changed files with 535 additions and 89 deletions.
44 changes: 34 additions & 10 deletions pkg/ccl/backupccl/backup_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,16 +140,28 @@ func backup(
var lastCheckpoint time.Time

var completedSpans, completedIntroducedSpans []roachpb.Span
kmsEnv := backupencryption.MakeBackupKMSEnv(execCtx.ExecCfg().Settings,
&execCtx.ExecCfg().ExternalIODirConfig, execCtx.ExecCfg().DB, execCtx.User(),
execCtx.ExecCfg().InternalExecutor)
// TODO(benesch): verify these files, rather than accepting them as truth
// blindly.
// No concurrency yet, so these assignments are safe.
for _, file := range backupManifest.Files {
if file.StartTime.IsEmpty() && !file.EndTime.IsEmpty() {
completedIntroducedSpans = append(completedIntroducedSpans, file.Span)
it, err := makeBackupManifestFileIterator(ctx, execCtx.ExecCfg().DistSQLSrv.ExternalStorage,
*backupManifest, encryption, &kmsEnv)
if err != nil {
return roachpb.RowCount{}, err
}
defer it.close()
for f, hasNext := it.next(); hasNext; f, hasNext = it.next() {
if f.StartTime.IsEmpty() && !f.EndTime.IsEmpty() {
completedIntroducedSpans = append(completedIntroducedSpans, f.Span)
} else {
completedSpans = append(completedSpans, file.Span)
completedSpans = append(completedSpans, f.Span)
}
}
if it.err() != nil {
return roachpb.RowCount{}, it.err()
}

// Subtract out any completed spans.
spans := filterSpans(backupManifest.Spans, completedSpans)
Expand All @@ -172,10 +184,6 @@ func backup(
return roachpb.RowCount{}, errors.Wrap(err, "failed to determine nodes on which to run")
}

kmsEnv := backupencryption.MakeBackupKMSEnv(execCtx.ExecCfg().Settings,
&execCtx.ExecCfg().ExternalIODirConfig, execCtx.ExecCfg().DB, execCtx.User(),
execCtx.ExecCfg().InternalExecutor)

backupSpecs, err := distBackupPlanSpecs(
ctx,
planCtx,
Expand Down Expand Up @@ -319,11 +327,28 @@ func backup(
}
}

resumerSpan.RecordStructured(&types.StringValue{Value: "writing backup manifest"})
// Write a `BACKUP_MANIFEST` file to support backups in mixed-version clusters
// with 22.2 nodes.
//
// TODO(adityamaru): We can stop writing `BACKUP_MANIFEST` in 23.2
// because a mixed-version cluster with 23.1 nodes will read the
// `BACKUP_METADATA` instead.
if err := backupinfo.WriteBackupManifest(ctx, defaultStore, backupbase.BackupManifestName,
encryption, &kmsEnv, backupManifest); err != nil {
return roachpb.RowCount{}, err
}

// Write a `BACKUP_METADATA` file along with SSTs for all the alloc heavy
// fields elided from the `BACKUP_MANIFEST`.
//
// TODO(adityamaru,rhu713): Once backup/restore switches from writing and
// reading backup manifests to `metadata.sst` we can stop writing the slim
// manifest.
if err := backupinfo.WriteFilesListMetadataWithSSTs(ctx, defaultStore, encryption,
&kmsEnv, backupManifest); err != nil {
return roachpb.RowCount{}, err
}

var tableStatistics []*stats.TableStatisticProto
for i := range backupManifest.Descriptors {
if tbl, _, _, _, _ := descpb.GetDescriptors(&backupManifest.Descriptors[i]); tbl != nil {
Expand All @@ -350,7 +375,6 @@ func backup(
Statistics: tableStatistics,
}

resumerSpan.RecordStructured(&types.StringValue{Value: "writing backup table statistics"})
if err := backupinfo.WriteTableStatistics(ctx, defaultStore, encryption, &kmsEnv, &statsTable); err != nil {
return roachpb.RowCount{}, err
}
Expand Down
49 changes: 47 additions & 2 deletions pkg/ccl/backupccl/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8241,6 +8241,51 @@ func TestReadBackupManifestMemoryMonitoring(t *testing.T) {
mem.Close(ctx)
}

// TestIncorrectAccessOfFilesInBackupMetadata ensures that an accidental use of
// the `Files` field (instead of its dedicated SST) on the `BACKUP_METADATA`
// results in an error on restore and show.
func TestIncorrectAccessOfFilesInBackupMetadata(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

_, sqlDB, rawDir, cleanupFn := backupRestoreTestSetup(t, singleNode, 1, InitManualReplication)
defer cleanupFn()
sqlDB.Exec(t, `CREATE DATABASE r1`)
sqlDB.Exec(t, `CREATE TABLE r1.foo ( id INT PRIMARY KEY)`)
sqlDB.Exec(t, `INSERT INTO r1.foo VALUES (1)`)
sqlDB.Exec(t, `BACKUP DATABASE r1 INTO 'nodelocal://0/test'`)

// Load/deserialize the manifest so we can mess with it.
matches, err := filepath.Glob(filepath.Join(rawDir, "test", "*/*/*", backupbase.BackupMetadataName))
require.NoError(t, err)
require.Len(t, matches, 1)
manifestPath := matches[0]
manifestData, err := os.ReadFile(manifestPath)
require.NoError(t, err)
manifestData, err = backupinfo.DecompressData(context.Background(), nil, manifestData)
require.NoError(t, err)
var backupManifest backuppb.BackupManifest
require.NoError(t, protoutil.Unmarshal(manifestData, &backupManifest))

// The manifest should have `HasExternalFilesList` set to true.
require.True(t, backupManifest.HasExternalFilesList)

// Set it to false, so that any operation that resolves the metadata treats
// this manifest as a pre-23.1 BACKUP_MANIFEST, and directly accesses the
// `Files` field, instead of reading from the external SST.
backupManifest.HasExternalFilesList = false
manifestData, err = protoutil.Marshal(&backupManifest)
require.NoError(t, err)
require.NoError(t, os.WriteFile(manifestPath, manifestData, 0644 /* perm */))
// Also write the checksum file to match the new manifest.
checksum, err := backupinfo.GetChecksum(manifestData)
require.NoError(t, err)
require.NoError(t, os.WriteFile(manifestPath+backupinfo.BackupManifestChecksumSuffix, checksum, 0644 /* perm */))

// Expect an error on restore.
sqlDB.ExpectErr(t, "assertion: this placeholder legacy Files entry should never be opened", `RESTORE DATABASE r1 FROM LATEST IN 'nodelocal://0/test' WITH new_db_name = 'r2'`)
}

func TestManifestTooNew(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand All @@ -8254,7 +8299,7 @@ func TestManifestTooNew(t *testing.T) {
sqlDB.Exec(t, `DROP DATABASE r1`)

// Load/deserialize the manifest so we can mess with it.
manifestPath := filepath.Join(rawDir, "too_new", backupbase.BackupManifestName)
manifestPath := filepath.Join(rawDir, "too_new", backupbase.BackupMetadataName)
manifestData, err := os.ReadFile(manifestPath)
require.NoError(t, err)
manifestData, err = backupinfo.DecompressData(context.Background(), nil, manifestData)
Expand Down Expand Up @@ -8336,7 +8381,7 @@ func flipBitInManifests(t *testing.T, rawDir string) {
foundManifest := false
err := filepath.Walk(rawDir, func(path string, info os.FileInfo, err error) error {
log.Infof(context.Background(), "visiting %s", path)
if filepath.Base(path) == backupbase.BackupManifestName {
if filepath.Base(path) == backupbase.BackupMetadataName {
foundManifest = true
data, err := os.ReadFile(path)
require.NoError(t, err)
Expand Down
18 changes: 15 additions & 3 deletions pkg/ccl/backupccl/backupbase/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,25 @@ const (
// Also exported for testing backup inspection tooling.
DateBasedIntoFolderName = "/2006/01/02-150405.00"

// BackupOldManifestName is an old name for the serialized BackupManifest
// proto. It is used by 20.1 nodes and earlier.
//
// TODO(adityamaru): Remove this in 22.2 as part of disallowing backups
// from >1 major version in the past.
BackupOldManifestName = "BACKUP"

// BackupManifestName is the file name used for serialized BackupManifest
// protos.
//
// TODO(adityamaru): Remove in 23.2 since at that point all nodes will be
// writing a SlimBackupManifest instead.
BackupManifestName = "BACKUP_MANIFEST"

// BackupOldManifestName is an old name for the serialized BackupManifest
// proto. It is used by 20.1 nodes and earlier.
BackupOldManifestName = "BACKUP"
// BackupMetadataName is the file name used for serialized BackupManifest
// protos written by 23.1 nodes and later. This manifest has the alloc heavy
// Files repeated fields nil'ed out, and is used in conjunction with SSTs for
// each of those elided fields.
BackupMetadataName = "BACKUP_METADATA"

// DefaultIncrementalsSubdir is the default name of the subdirectory to which
// incremental backups will be written.
Expand Down
57 changes: 49 additions & 8 deletions pkg/ccl/backupccl/backupinfo/backup_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ import (
const (
// MetadataSSTName is the name of the SST file containing the backup metadata.
MetadataSSTName = "metadata.sst"
// BackupMetadataFilesListPath is the name of the SST file containing the
// BackupManifest_Files of the backup. This file is always written in
// conjunction with the `BACKUP_METADATA`.
BackupMetadataFilesListPath = "filelist.sst"
// FileInfoPath is the name of the SST file containing the
// BackupManifest_Files of the backup.
FileInfoPath = "fileinfo.sst"
Expand All @@ -55,6 +59,20 @@ var iterOpts = storage.IterOptions{
UpperBound: keys.MaxKey,
}

// WriteFilesListSST is responsible for constructing and writing the
// filePathInfo to dest. This file contains the `BackupManifest_Files` of the
// backup.
func WriteFilesListSST(
ctx context.Context,
dest cloud.ExternalStorage,
enc *jobspb.BackupEncryptionOptions,
kmsEnv cloud.KMSEnv,
manifest *backuppb.BackupManifest,
filePathInfo string,
) error {
return writeFilesSST(ctx, manifest, dest, enc, kmsEnv, filePathInfo)
}

// WriteBackupMetadataSST is responsible for constructing and writing the
// `metadata.sst` to dest. This file contains the metadata corresponding to this
// backup.
Expand Down Expand Up @@ -244,9 +262,8 @@ func writeDescsToMetadata(
return nil
}

func writeFilesToMetadata(
func writeFilesSST(
ctx context.Context,
sst storage.SSTWriter,
m *backuppb.BackupManifest,
dest cloud.ExternalStorage,
enc *jobspb.BackupEncryptionOptions,
Expand All @@ -267,12 +284,13 @@ func writeFilesToMetadata(
return cmp < 0 || (cmp == 0 && strings.Compare(m.Files[i].Path, m.Files[j].Path) < 0)
})

for _, i := range m.Files {
b, err := protoutil.Marshal(&i)
for i := range m.Files {
file := m.Files[i]
b, err := protoutil.Marshal(&file)
if err != nil {
return err
}
if err := fileSST.PutUnversioned(encodeFileSSTKey(i.Span.Key, i.Path), b); err != nil {
if err := fileSST.PutUnversioned(encodeFileSSTKey(file.Span.Key, file.Path), b); err != nil {
return err
}
}
Expand All @@ -281,11 +299,21 @@ func writeFilesToMetadata(
if err != nil {
return err
}
err = w.Close()
if err != nil {
return w.Close()
}

func writeFilesToMetadata(
ctx context.Context,
sst storage.SSTWriter,
m *backuppb.BackupManifest,
dest cloud.ExternalStorage,
enc *jobspb.BackupEncryptionOptions,
kmsEnv cloud.KMSEnv,
fileInfoPath string,
) error {
if err := writeFilesSST(ctx, m, dest, enc, kmsEnv, fileInfoPath); err != nil {
return err
}

// Write the file info into the main metadata SST.
return sst.PutUnversioned(encodeFilenameSSTKey(fileInfoPath), nil)
}
Expand Down Expand Up @@ -1030,6 +1058,19 @@ func (b *BackupMetadata) NewFileIter(ctx context.Context) (*FileIterator, error)
return &FileIterator{mergedIterator: iter}, nil
}

// NewFileSSTIter creates a new FileIterator to iterate over the storeFile.
// It is the caller's responsibility to Close() the returned iterator.
func NewFileSSTIter(
ctx context.Context, storeFile storageccl.StoreFile, encOpts *roachpb.FileEncryptionOptions,
) (*FileIterator, error) {
iter, err := storageccl.ExternalSSTReader(ctx, []storageccl.StoreFile{storeFile}, encOpts, iterOpts)
if err != nil {
return nil, err
}
iter.SeekGE(storage.MVCCKey{})
return &FileIterator{mergedIterator: iter}, nil
}

// Close closes the iterator.
func (fi *FileIterator) Close() {
fi.mergedIterator.Close()
Expand Down
86 changes: 76 additions & 10 deletions pkg/ccl/backupccl/backupinfo/manifest_handling.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,21 +153,46 @@ func ReadBackupManifestFromStore(
) (backuppb.BackupManifest, int64, error) {
ctx, sp := tracing.ChildSpan(ctx, "backupinfo.ReadBackupManifestFromStore")
defer sp.Finish()
backupManifest, memSize, err := ReadBackupManifest(ctx, mem, exportStore, backupbase.BackupManifestName,

manifest, memSize, err := ReadBackupManifest(ctx, mem, exportStore, backupbase.BackupMetadataName,
encryption, kmsEnv)
if err != nil {
oldManifest, newMemSize, newErr := ReadBackupManifest(ctx, mem, exportStore, backupbase.BackupOldManifestName,
encryption, kmsEnv)
if newErr != nil {
if !errors.Is(err, cloud.ErrFileDoesNotExist) {
return backuppb.BackupManifest{}, 0, err
}
backupManifest = oldManifest
memSize = newMemSize

// If we did not find `BACKUP_METADATA` we look for the
// `BACKUP_MANIFEST` file as it is possible the backup was created by a
// pre-23.1 node.
backupManifest, backupManifestMemSize, backupManifestErr := ReadBackupManifest(ctx, mem, exportStore,
backupbase.BackupManifestName, encryption, kmsEnv)
if backupManifestErr != nil {
if !errors.Is(backupManifestErr, cloud.ErrFileDoesNotExist) {
return backuppb.BackupManifest{}, 0, err
}

// If we did not find a `BACKUP_MANIFEST` we look for a `BACKUP` file as
// it is possible the backup was created by a pre-20.1 node.
//
// TODO(adityamaru): Remove this logic once we disallow restores beyond
// the binary upgrade compatibility window.
oldBackupManifest, oldBackupManifestMemSize, oldBackupManifestErr := ReadBackupManifest(ctx, mem, exportStore,
backupbase.BackupOldManifestName, encryption, kmsEnv)
if oldBackupManifestErr != nil {
return backuppb.BackupManifest{}, 0, oldBackupManifestErr
} else {
// We found a `BACKUP` manifest file.
manifest = oldBackupManifest
memSize = oldBackupManifestMemSize
}
} else {
// We found a `BACKUP_MANIFEST` file.
manifest = backupManifest
memSize = backupManifestMemSize
}
}
backupManifest.Dir = exportStore.Conf()
// TODO(dan): Sanity check this BackupManifest: non-empty EndTime, non-empty
// Paths, and non-overlapping Spans and keyranges in Files.
return backupManifest, memSize, nil
manifest.Dir = exportStore.Conf()
return manifest, memSize, nil
}

// compressData compresses data buffer and returns compressed
Expand Down Expand Up @@ -529,6 +554,47 @@ func WriteBackupLock(
return cloud.WriteFile(ctx, defaultStore, lockFileName, bytes.NewReader([]byte("lock")))
}

// WriteFilesListMetadataWithSSTs writes a "slim" version of manifest
// to `exportStore`. This version has the alloc heavy `Files` repeated field
// nil'ed out, and written to an accompanying SST instead.
func WriteFilesListMetadataWithSSTs(
ctx context.Context,
exportStore cloud.ExternalStorage,
encryption *jobspb.BackupEncryptionOptions,
kmsEnv cloud.KMSEnv,
manifest *backuppb.BackupManifest,
) error {
if err := writeFilesListMetadata(ctx, exportStore, backupbase.BackupMetadataName, encryption,
kmsEnv, manifest); err != nil {
return errors.Wrap(err, "failed to write the slim backup manifest")
}
return errors.Wrap(WriteFilesListSST(ctx, exportStore, encryption, kmsEnv, manifest,
BackupMetadataFilesListPath), "failed to write backup manfiest files SST")
}

// writeFilesListMetadata compresses and writes a slimmer version of the
// BackupManifest `desc` to `exportStore` with the `Files` field of the proto
// set to a bogus value that will error out on incorrect use.
func writeFilesListMetadata(
ctx context.Context,
exportStore cloud.ExternalStorage,
filename string,
encryption *jobspb.BackupEncryptionOptions,
kmsEnv cloud.KMSEnv,
manifest *backuppb.BackupManifest,
) error {
slimManifest := *manifest
// We write a bogus file entry to ensure that no call site incorrectly uses
// the `Files` field from the FilesListMetadata proto.
bogusFile := backuppb.BackupManifest_File{
Span: roachpb.Span{Key: keys.MinKey, EndKey: keys.MaxKey},
Path: "assertion: this placeholder legacy Files entry should never be opened",
}
slimManifest.Files = []backuppb.BackupManifest_File{bogusFile}
slimManifest.HasExternalFilesList = true
return WriteBackupManifest(ctx, exportStore, filename, encryption, kmsEnv, &slimManifest)
}

// WriteBackupManifest compresses and writes the passed in BackupManifest `desc`
// to `exportStore`.
func WriteBackupManifest(
Expand Down
Loading

0 comments on commit 60f4783

Please sign in to comment.