From 6d8c29374c5437b7a8986edb82032abe25118f52 Mon Sep 17 00:00:00 2001 From: Nalin Dahyabhai Date: Tue, 13 Aug 2024 16:04:29 -0400 Subject: [PATCH] Commit(): retry committing to local storage on storage.LayerUnknown If we get a storage.LayerUnknown error while committing to local storage, guess that we tried to reuse a layer in an image that got deleted in the brief window between when the commit logic decided that it could reuse the layer, and when it actually tried to create the image record. It's tempting to try to resolve this in the image library, but at the point where this error crops up, it may already have skipped pulling the relevant layer blob, so the entire copy operation would have to be restarted from the beginning anyway, and that's the retry package's job. It's tempting to bake this into the common library's retry package, but that would require it to "know" about containers/storage, and while in practice most of its callers already depend on the storage library, that may a bit much. Signed-off-by: Nalin Dahyabhai --- common.go | 33 ++++++---- common_test.go | 168 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+), 11 deletions(-) create mode 100644 common_test.go diff --git a/common.go b/common.go index 83e8c70e6c1..92a7cffff64 100644 --- a/common.go +++ b/common.go @@ -2,6 +2,7 @@ package buildah import ( "context" + "errors" "io" "path/filepath" "time" @@ -11,6 +12,7 @@ import ( cp "github.com/containers/image/v5/copy" "github.com/containers/image/v5/docker" "github.com/containers/image/v5/signature" + is "github.com/containers/image/v5/storage" "github.com/containers/image/v5/types" encconfig "github.com/containers/ocicrypt/config" "github.com/containers/storage" @@ -67,22 +69,31 @@ func getSystemContext(store storage.Store, defaults *types.SystemContext, signat return sc } -func retryCopyImage(ctx context.Context, policyContext *signature.PolicyContext, dest, src, registry types.ImageReference, copyOptions *cp.Options, maxRetries int, retryDelay time.Duration) ([]byte, error) { +func retryCopyImage(ctx context.Context, policyContext *signature.PolicyContext, maybeWrappedDest, maybeWrappedSrc, directDest types.ImageReference, copyOptions *cp.Options, maxRetries int, retryDelay time.Duration) ([]byte, error) { + return retryCopyImageWithOptions(ctx, policyContext, maybeWrappedDest, maybeWrappedSrc, directDest, copyOptions, maxRetries, retryDelay, true) +} + +func retryCopyImageWithOptions(ctx context.Context, policyContext *signature.PolicyContext, maybeWrappedDest, maybeWrappedSrc, directDest types.ImageReference, copyOptions *cp.Options, maxRetries int, retryDelay time.Duration, retryOnLayerUnknown bool) ([]byte, error) { var ( manifestBytes []byte err error - lastErr error ) err = retry.IfNecessary(ctx, func() error { - manifestBytes, err = cp.Image(ctx, policyContext, dest, src, copyOptions) - if registry != nil && registry.Transport().Name() != docker.Transport.Name() { - lastErr = err - return nil - } + manifestBytes, err = cp.Image(ctx, policyContext, maybeWrappedDest, maybeWrappedSrc, copyOptions) return err - }, &retry.RetryOptions{MaxRetry: maxRetries, Delay: retryDelay}) - if lastErr != nil { - err = lastErr - } + }, &retry.RetryOptions{MaxRetry: maxRetries, Delay: retryDelay, IsErrorRetryable: func(err error) bool { + if retryOnLayerUnknown && directDest.Transport().Name() == is.Transport.Name() && errors.Is(err, storage.ErrLayerUnknown) { + // we were trying to reuse a layer that belonged to an + // image that was deleted at just the right (worst + // possible) time? yeah, try again + return true + } + if directDest.Transport().Name() != docker.Transport.Name() { + // if we're not talking to a registry, then nah + return false + } + // hand it off to the default should-this-be-retried logic + return retry.IsErrorRetryable(err) + }}) return manifestBytes, err } diff --git a/common_test.go b/common_test.go new file mode 100644 index 00000000000..e4ba17960d1 --- /dev/null +++ b/common_test.go @@ -0,0 +1,168 @@ +package buildah + +import ( + "archive/tar" + "bytes" + "context" + "encoding/json" + "os" + "path/filepath" + "testing" + "time" + + cp "github.com/containers/image/v5/copy" + "github.com/containers/image/v5/signature" + imageStorage "github.com/containers/image/v5/storage" + "github.com/containers/image/v5/transports/alltransports" + "github.com/containers/storage" + storageTypes "github.com/containers/storage/types" + digest "github.com/opencontainers/go-digest" + ispec "github.com/opencontainers/image-spec/specs-go" + v1 "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type testRetryCopyImageWrappedStore struct { + phantomImageID string + storage.Store +} + +func (ts *testRetryCopyImageWrappedStore) CreateImage(id string, names []string, layer, metadata string, options *storage.ImageOptions) (*storage.Image, error) { + if id == ts.phantomImageID { + if img, err := ts.Store.Image(id); img != nil && err == nil { + // i'm another thread somewhere + if _, err := ts.Store.DeleteImage(id, true); err != nil { + return nil, err + } + } + } + return ts.Store.CreateImage(id, names, layer, metadata, options) +} + +func TestRetryCopyImage(t *testing.T) { + ctx := context.TODO() + + graphDriverName := os.Getenv("STORAGE_DRIVER") + if graphDriverName == "" { + graphDriverName = "vfs" + } + store, err := storage.GetStore(storageTypes.StoreOptions{ + RunRoot: t.TempDir(), + GraphRoot: t.TempDir(), + GraphDriverName: graphDriverName, + }) + require.NoError(t, err, "initializing storage") + t.Cleanup(func() { _, err := store.Shutdown(true); assert.NoError(t, err) }) + + // construct an "image" that can be pulled into local storage + var layerBuffer bytes.Buffer + tw := tar.NewWriter(&layerBuffer) + err = tw.WriteHeader(&tar.Header{ + Name: "rootfile", + Typeflag: tar.TypeReg, + Size: 1234, + }) + require.NoError(t, err, "writing header for archive") + _, err = tw.Write(make([]byte, 1234)) + require.NoError(t, err, "writing empty file to archive") + require.NoError(t, tw.Close(), "finishing layer") + layerDigest := digest.Canonical.FromBytes(layerBuffer.Bytes()) + imageConfig := v1.Image{ + RootFS: v1.RootFS{ + Type: "layers", + DiffIDs: []digest.Digest{layerDigest}, + }, + } + imageConfigBytes, err := json.Marshal(&imageConfig) + require.NoError(t, err, "marshalling image configuration blob") + imageConfigDigest := digest.Canonical.FromBytes(imageConfigBytes) + imageManifest := v1.Manifest{ + Versioned: ispec.Versioned{ + SchemaVersion: 2, + }, + MediaType: v1.MediaTypeImageManifest, + Config: v1.Descriptor{ + MediaType: v1.MediaTypeImageConfig, + Size: int64(len(imageConfigBytes)), + Digest: digest.FromBytes(imageConfigBytes), + }, + Layers: []v1.Descriptor{ + { + MediaType: v1.MediaTypeImageLayer, + Size: int64(layerBuffer.Len()), + Digest: layerDigest, + }, + }, + } + imageManifestBytes, err := json.Marshal(&imageManifest) + require.NoError(t, err, "marshalling image manifest") + imageManifestDigest := digest.Canonical.FromBytes(imageManifestBytes) + + // write it to an oci layout + ociDir := t.TempDir() + blobbyDir := filepath.Join(ociDir, "blobs") + require.NoError(t, os.Mkdir(blobbyDir, 0o700)) + blobDir := filepath.Join(blobbyDir, layerDigest.Algorithm().String()) + require.NoError(t, os.Mkdir(blobDir, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(blobDir, layerDigest.Encoded()), layerBuffer.Bytes(), 0o600), "writing layer") + require.NoError(t, os.WriteFile(filepath.Join(blobDir, imageConfigDigest.Encoded()), imageConfigBytes, 0o600), "writing image config") + require.NoError(t, os.WriteFile(filepath.Join(blobDir, imageManifestDigest.Encoded()), imageManifestBytes, 0o600), "writing manifest") + imageIndex := v1.Index{ + Versioned: ispec.Versioned{ + SchemaVersion: 2, + }, + MediaType: v1.MediaTypeImageIndex, + Manifests: []v1.Descriptor{ + { + MediaType: v1.MediaTypeImageManifest, + Digest: imageManifestDigest, + Size: int64(len(imageManifestBytes)), + }, + }, + } + imageIndexBytes, err := json.Marshal(&imageIndex) + require.NoError(t, err, "marshalling image index") + require.NoError(t, os.WriteFile(filepath.Join(ociDir, v1.ImageIndexFile), imageIndexBytes, 0o600), "writing image index") + imageLayout := v1.ImageLayout{ + Version: v1.ImageLayoutVersion, + } + imageLayoutBytes, err := json.Marshal(&imageLayout) + require.NoError(t, err, "marshalling image layout") + require.NoError(t, os.WriteFile(filepath.Join(ociDir, v1.ImageLayoutFile), imageLayoutBytes, 0o600), "writing image layout") + + // pull the image, twice, just to make sure nothing weird happens + srcRef, err := alltransports.ParseImageName("oci:" + ociDir) + require.NoError(t, err, "building reference to image layout") + destRef, err := imageStorage.Transport.NewStoreReference(store, nil, imageConfigDigest.Encoded()) + require.NoError(t, err, "building reference to image in store") + policy, err := signature.NewPolicyFromFile("tests/policy.json") + require.NoError(t, err, "reading signature policy") + policyContext, err := signature.NewPolicyContext(policy) + require.NoError(t, err, "building policy context") + t.Cleanup(func() { + require.NoError(t, policyContext.Destroy(), "destroying policy context") + }) + _, err = retryCopyImage(ctx, policyContext, destRef, srcRef, destRef, &cp.Options{}, 3, 1*time.Second) + require.NoError(t, err, "copying image") + _, err = retryCopyImage(ctx, policyContext, destRef, srcRef, destRef, &cp.Options{}, 3, 1*time.Second) + require.NoError(t, err, "copying image") + + // now make something weird happen + wrappedStore := &testRetryCopyImageWrappedStore{ + phantomImageID: imageConfigDigest.Encoded(), + Store: store, + } + wrappedDestRef, err := imageStorage.Transport.NewStoreReference(wrappedStore, nil, imageConfigDigest.Encoded()) + require.NoError(t, err, "building wrapped reference") + + // copy with retry-on-storage-layer-unknown = false: expect an error + // (if it succeeds, either the test is broken, or we can remove this + // case from the retry function) + _, err = retryCopyImageWithOptions(ctx, policyContext, wrappedDestRef, srcRef, wrappedDestRef, &cp.Options{}, 3, 1*time.Second, false) + require.ErrorIs(t, err, storage.ErrLayerUnknown, "copying image") + + // copy with retry-on-storage-layer-unknown = true: expect no error + _, err = retryCopyImageWithOptions(ctx, policyContext, wrappedDestRef, srcRef, wrappedDestRef, &cp.Options{}, 3, 1*time.Second, true) + require.NoError(t, err, "copying image") +}