Skip to content

Commit

Permalink
Add retry logic for image copying (#63)
Browse files Browse the repository at this point in the history
* Add retry logic for image copying

* Remove unnecessary break

* Change to 5 retries

* Use proper format for prints

* Return empty byte array

* Add todo

(cherry picked from commit b7c9bbd)
  • Loading branch information
dymurray committed Jun 9, 2020
1 parent e95429a commit e0862f6
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 12 deletions.
6 changes: 3 additions & 3 deletions velero-plugins/migimagestream/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (p *BackupPlugin) Execute(item runtime.Unstructured, backup *v1.Backup) (ru
p.Log.Info(fmt.Sprintf("[is-backup] copying from: %s", srcPath))
p.Log.Info(fmt.Sprintf("[is-backup] copying to: %s", destPath))

imgManifest, err := copyImageBackup(srcPath, destPath)
imgManifest, err := copyImageBackup(p.Log, srcPath, destPath)
if err != nil {
p.Log.Info(fmt.Sprintf("[is-backup] Error copying image: %v", err))
return nil, nil, err
Expand Down Expand Up @@ -126,7 +126,7 @@ func findStatusTag(tags []imagev1API.NamedTagEventList, name string) *imagev1API
return nil
}

func copyImageBackup(src, dest string) ([]byte, error) {
func copyImageBackup(log logrus.FieldLogger, src, dest string) ([]byte, error) {
sourceCtx, err := internalRegistrySystemContext()
if err != nil {
return []byte{}, err
Expand All @@ -135,5 +135,5 @@ func copyImageBackup(src, dest string) ([]byte, error) {
if err != nil {
return []byte{}, err
}
return copyImage(src, dest, sourceCtx, destinationCtx)
return copyImage(log, src, dest, sourceCtx, destinationCtx)
}
6 changes: 3 additions & 3 deletions velero-plugins/migimagestream/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (p *RestorePlugin) Execute(input *velero.RestoreItemActionExecuteInput) (*v

p.Log.Info(fmt.Sprintf("[is-restore] copying from: %s", srcPath))
p.Log.Info(fmt.Sprintf("[is-restore] copying to: %s", destPath))
manifest, err := copyImageRestore(srcPath, destPath)
manifest, err := copyImageRestore(p.Log, srcPath, destPath)
if err != nil {
p.Log.Info(fmt.Sprintf("[is-restore] Error copying image: %v", err))
return nil, err
Expand All @@ -108,7 +108,7 @@ func (p *RestorePlugin) Execute(input *velero.RestoreItemActionExecuteInput) (*v
return velero.NewRestoreItemActionExecuteOutput(input.Item).WithoutRestore(), nil
}

func copyImageRestore(src, dest string) ([]byte, error) {
func copyImageRestore(log logrus.FieldLogger, src, dest string) ([]byte, error) {
sourceCtx, err := migrationRegistrySystemContext()
if err != nil {
return []byte{}, err
Expand All @@ -117,5 +117,5 @@ func copyImageRestore(src, dest string) ([]byte, error) {
if err != nil {
return []byte{}, err
}
return copyImage(src, dest, sourceCtx, destinationCtx)
return copyImage(log, src, dest, sourceCtx, destinationCtx)
}
33 changes: 27 additions & 6 deletions velero-plugins/migimagestream/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,20 @@ import (
"context"
"errors"
"fmt"
"strings"
"time"

"github.com/containers/image/v5/copy"
"github.com/containers/image/v5/signature"
"github.com/containers/image/v5/transports/alltransports"
"github.com/containers/image/v5/types"
imagev1API "github.com/openshift/api/image/v1"
"github.com/sirupsen/logrus"

"k8s.io/client-go/rest"
)

func copyImage(src, dest string, sourceCtx, destinationCtx *types.SystemContext) ([]byte, error) {
func copyImage(log logrus.FieldLogger, src, dest string, sourceCtx, destinationCtx *types.SystemContext) ([]byte, error) {
policyContext, err := getPolicyContext()
if err != nil {
return []byte{}, fmt.Errorf("Error loading trust policy: %v", err)
Expand All @@ -29,11 +32,29 @@ func copyImage(src, dest string, sourceCtx, destinationCtx *types.SystemContext)
if err != nil {
return []byte{}, fmt.Errorf("Invalid destination name %s: %v", dest, err)
}
manifest, err := copy.Image(context.Background(), policyContext, destRef, srcRef, &copy.Options{
SourceCtx: sourceCtx,
DestinationCtx: destinationCtx,
})
return manifest, err

// Let's retry the image copy up to 10 times
// Each retry will wait 5 seconds longer
// Let's log a warning if we encounter `blob unknown to registry`
// TODO: Change this to only retry on specific errors from image copy
retryWait := 5
log.Info(fmt.Sprintf("copying image: %s; will attempt up to 5 times...", src))
for i := 0; i < 4; i++ {
manifest, err := copy.Image(context.Background(), policyContext, destRef, srcRef, &copy.Options{
SourceCtx: sourceCtx,
DestinationCtx: destinationCtx,
})
if err == nil {
return manifest, nil
}
if strings.Contains(err.Error(), "blob unknown to registry") {
log.Warn(fmt.Sprintf("encountered `blob unknown to registry error` for image %s", src))
}
log.Info(fmt.Sprintf("attempt #%v failed, waiting %vs and then retrying", i, retryWait))
time.Sleep(time.Duration(retryWait) * time.Second)
retryWait += 5
}
return []byte{}, err
}

func getPolicyContext() (*signature.PolicyContext, error) {
Expand Down

0 comments on commit e0862f6

Please sign in to comment.