From 0b66ed910a84d3d3f6258b4156634365f537f3d5 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Tue, 12 Mar 2024 11:17:09 +0800 Subject: [PATCH] This is an automated cherry-pick of #51578 Signed-off-by: ti-chi-bot --- br/pkg/restore/client.go | 10 ++++++---- br/pkg/restore/import.go | 4 ++++ br/pkg/utils/backoff.go | 11 +++++++---- br/pkg/utils/backoff_test.go | 16 +++++----------- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index 7720ab332deee..ceda83d857fa0 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -1454,12 +1454,14 @@ LOOPFORTABLE: rc.workerPool.ApplyOnErrorGroup(eg, func() error { filesGroups := getGroupFiles(filesReplica, rc.fileImporter.supportMultiIngest) for _, filesGroup := range filesGroups { - if importErr := func(fs []*backuppb.File) error { + if importErr := func(fs []*backuppb.File) (err error) { fileStart := time.Now() defer func() { - log.Info("import files done", logutil.Files(filesGroup), - zap.Duration("take", time.Since(fileStart))) - updateCh.Inc() + if err == nil { + log.Info("import files done", logutil.Files(filesGroup), + zap.Duration("take", time.Since(fileStart))) + updateCh.Inc() + } }() return rc.fileImporter.ImportSSTFiles(ectx, fs, rewriteRules, rc.cipher, rc.dom.Store().GetCodec().GetAPIVersion()) }(filesGroup); importErr != nil { diff --git a/br/pkg/restore/import.go b/br/pkg/restore/import.go index 8ed8778882878..2990d75fb309a 100644 --- a/br/pkg/restore/import.go +++ b/br/pkg/restore/import.go @@ -816,6 +816,10 @@ func (importer *FileImporter) downloadRawKVSST( func (importer *FileImporter) ingest( ctx context.Context, +<<<<<<< HEAD +======= + files []*backuppb.File, +>>>>>>> d604b069399 (br: stop log when full restore failed (#51578)) info *split.RegionInfo, downloadMetas []*import_sstpb.SSTMeta, ) error { diff --git a/br/pkg/utils/backoff.go b/br/pkg/utils/backoff.go index 6b7aa7a127863..658a4d965d886 100644 --- a/br/pkg/utils/backoff.go +++ b/br/pkg/utils/backoff.go @@ -31,9 +31,9 @@ const ( backupSSTWaitInterval = 2 * time.Second backupSSTMaxWaitInterval = 3 * time.Second - resetTSRetryTime = 16 + resetTSRetryTime = 32 resetTSWaitInterval = 50 * time.Millisecond - resetTSMaxWaitInterval = 500 * time.Millisecond + resetTSMaxWaitInterval = 2 * time.Second resetTSRetryTimeExt = 600 resetTSWaitIntervalExt = 500 * time.Millisecond @@ -167,7 +167,6 @@ func NewBackupSSTBackoffer() Backoffer { } func (bo *importerBackoffer) NextBackoff(err error) time.Duration { - log.Warn("retry to import ssts", zap.Int("attempt", bo.attempt), zap.Error(err)) // we don't care storeID here. res := bo.errContext.HandleErrorMsg(err.Error(), 0) if res.Strategy == RetryStrategy { @@ -249,8 +248,12 @@ func (bo *pdReqBackoffer) NextBackoff(err error) time.Duration { bo.delayTime = 2 * bo.delayTime bo.attempt-- default: + // If the connection timeout, pd client would cancel the context, and return grpc context cancel error. + // So make the codes.Canceled retryable too. + // It's OK to retry the grpc context cancel error, because the parent context cancel returns context.Canceled. + // For example, cancel the `ectx` and then pdClient.GetTS(ectx) returns context.Canceled instead of grpc context canceled. switch status.Code(e) { - case codes.DeadlineExceeded, codes.NotFound, codes.AlreadyExists, codes.PermissionDenied, codes.ResourceExhausted, codes.Aborted, codes.OutOfRange, codes.Unavailable, codes.DataLoss, codes.Unknown: + case codes.DeadlineExceeded, codes.Canceled, codes.NotFound, codes.AlreadyExists, codes.PermissionDenied, codes.ResourceExhausted, codes.Aborted, codes.OutOfRange, codes.Unavailable, codes.DataLoss, codes.Unknown: bo.delayTime = 2 * bo.delayTime bo.attempt-- default: diff --git a/br/pkg/utils/backoff_test.go b/br/pkg/utils/backoff_test.go index ac6582958fe7a..0280c88ee60b7 100644 --- a/br/pkg/utils/backoff_test.go +++ b/br/pkg/utils/backoff_test.go @@ -123,9 +123,12 @@ func TestPdBackoffWithRetryableError(t *testing.T) { if counter == 2 { return io.EOF } + if counter == 6 { + return context.Canceled + } return gRPCError }, backoffer) - require.Equal(t, 16, counter) + require.Equal(t, 7, counter) require.Equal(t, []error{ gRPCError, gRPCError, @@ -133,16 +136,7 @@ func TestPdBackoffWithRetryableError(t *testing.T) { gRPCError, gRPCError, gRPCError, - gRPCError, - gRPCError, - gRPCError, - gRPCError, - gRPCError, - gRPCError, - gRPCError, - gRPCError, - gRPCError, - gRPCError, + context.Canceled, }, multierr.Errors(err)) }