Skip to content

Commit

Permalink
Commit(): retry committing to local storage on storage.LayerUnknown
Browse files Browse the repository at this point in the history
If we get a storage.LayerUnknown error while committing to local
storage, guess that we tried to reuse a layer in an image that got
deleted in the brief window between when the commit logic decided that
it could reuse the layer, and when it actually tried to create the image
record.

It's tempting to try to resolve this in the image library, but at the
point where this error crops up, it may already have skipped pulling the
relevant layer blob, so the entire copy operation would have to be
restarted from the beginning anyway, and that's the retry package's job.

It's tempting to bake this into the common library's retry package, but
that would require it to "know" about containers/storage, and while in
practice most of its callers already depend on the storage library, that
may a bit much.

Signed-off-by: Nalin Dahyabhai <[email protected]>
  • Loading branch information
nalind committed Aug 14, 2024
1 parent b5765b1 commit 6d8c293
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 11 deletions.
33 changes: 22 additions & 11 deletions common.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package buildah

import (
"context"
"errors"
"io"
"path/filepath"
"time"
Expand All @@ -11,6 +12,7 @@ import (
cp "github.com/containers/image/v5/copy"
"github.com/containers/image/v5/docker"
"github.com/containers/image/v5/signature"
is "github.com/containers/image/v5/storage"
"github.com/containers/image/v5/types"
encconfig "github.com/containers/ocicrypt/config"
"github.com/containers/storage"
Expand Down Expand Up @@ -67,22 +69,31 @@ func getSystemContext(store storage.Store, defaults *types.SystemContext, signat
return sc
}

func retryCopyImage(ctx context.Context, policyContext *signature.PolicyContext, dest, src, registry types.ImageReference, copyOptions *cp.Options, maxRetries int, retryDelay time.Duration) ([]byte, error) {
func retryCopyImage(ctx context.Context, policyContext *signature.PolicyContext, maybeWrappedDest, maybeWrappedSrc, directDest types.ImageReference, copyOptions *cp.Options, maxRetries int, retryDelay time.Duration) ([]byte, error) {
return retryCopyImageWithOptions(ctx, policyContext, maybeWrappedDest, maybeWrappedSrc, directDest, copyOptions, maxRetries, retryDelay, true)
}

func retryCopyImageWithOptions(ctx context.Context, policyContext *signature.PolicyContext, maybeWrappedDest, maybeWrappedSrc, directDest types.ImageReference, copyOptions *cp.Options, maxRetries int, retryDelay time.Duration, retryOnLayerUnknown bool) ([]byte, error) {
var (
manifestBytes []byte
err error
lastErr error
)
err = retry.IfNecessary(ctx, func() error {
manifestBytes, err = cp.Image(ctx, policyContext, dest, src, copyOptions)
if registry != nil && registry.Transport().Name() != docker.Transport.Name() {
lastErr = err
return nil
}
manifestBytes, err = cp.Image(ctx, policyContext, maybeWrappedDest, maybeWrappedSrc, copyOptions)
return err
}, &retry.RetryOptions{MaxRetry: maxRetries, Delay: retryDelay})
if lastErr != nil {
err = lastErr
}
}, &retry.RetryOptions{MaxRetry: maxRetries, Delay: retryDelay, IsErrorRetryable: func(err error) bool {
if retryOnLayerUnknown && directDest.Transport().Name() == is.Transport.Name() && errors.Is(err, storage.ErrLayerUnknown) {
// we were trying to reuse a layer that belonged to an
// image that was deleted at just the right (worst
// possible) time? yeah, try again
return true
}
if directDest.Transport().Name() != docker.Transport.Name() {
// if we're not talking to a registry, then nah
return false
}
// hand it off to the default should-this-be-retried logic
return retry.IsErrorRetryable(err)
}})
return manifestBytes, err
}
168 changes: 168 additions & 0 deletions common_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package buildah

import (
"archive/tar"
"bytes"
"context"
"encoding/json"
"os"
"path/filepath"
"testing"
"time"

cp "github.com/containers/image/v5/copy"
"github.com/containers/image/v5/signature"
imageStorage "github.com/containers/image/v5/storage"
"github.com/containers/image/v5/transports/alltransports"
"github.com/containers/storage"
storageTypes "github.com/containers/storage/types"
digest "github.com/opencontainers/go-digest"
ispec "github.com/opencontainers/image-spec/specs-go"
v1 "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

type testRetryCopyImageWrappedStore struct {
phantomImageID string
storage.Store
}

func (ts *testRetryCopyImageWrappedStore) CreateImage(id string, names []string, layer, metadata string, options *storage.ImageOptions) (*storage.Image, error) {
if id == ts.phantomImageID {
if img, err := ts.Store.Image(id); img != nil && err == nil {
// i'm another thread somewhere
if _, err := ts.Store.DeleteImage(id, true); err != nil {
return nil, err
}
}
}
return ts.Store.CreateImage(id, names, layer, metadata, options)
}

func TestRetryCopyImage(t *testing.T) {
ctx := context.TODO()

graphDriverName := os.Getenv("STORAGE_DRIVER")
if graphDriverName == "" {
graphDriverName = "vfs"
}
store, err := storage.GetStore(storageTypes.StoreOptions{
RunRoot: t.TempDir(),
GraphRoot: t.TempDir(),
GraphDriverName: graphDriverName,
})
require.NoError(t, err, "initializing storage")
t.Cleanup(func() { _, err := store.Shutdown(true); assert.NoError(t, err) })

// construct an "image" that can be pulled into local storage
var layerBuffer bytes.Buffer
tw := tar.NewWriter(&layerBuffer)
err = tw.WriteHeader(&tar.Header{
Name: "rootfile",
Typeflag: tar.TypeReg,
Size: 1234,
})
require.NoError(t, err, "writing header for archive")
_, err = tw.Write(make([]byte, 1234))
require.NoError(t, err, "writing empty file to archive")
require.NoError(t, tw.Close(), "finishing layer")
layerDigest := digest.Canonical.FromBytes(layerBuffer.Bytes())
imageConfig := v1.Image{
RootFS: v1.RootFS{
Type: "layers",
DiffIDs: []digest.Digest{layerDigest},
},
}
imageConfigBytes, err := json.Marshal(&imageConfig)
require.NoError(t, err, "marshalling image configuration blob")
imageConfigDigest := digest.Canonical.FromBytes(imageConfigBytes)
imageManifest := v1.Manifest{
Versioned: ispec.Versioned{
SchemaVersion: 2,
},
MediaType: v1.MediaTypeImageManifest,
Config: v1.Descriptor{
MediaType: v1.MediaTypeImageConfig,
Size: int64(len(imageConfigBytes)),
Digest: digest.FromBytes(imageConfigBytes),
},
Layers: []v1.Descriptor{
{
MediaType: v1.MediaTypeImageLayer,
Size: int64(layerBuffer.Len()),
Digest: layerDigest,
},
},
}
imageManifestBytes, err := json.Marshal(&imageManifest)
require.NoError(t, err, "marshalling image manifest")
imageManifestDigest := digest.Canonical.FromBytes(imageManifestBytes)

// write it to an oci layout
ociDir := t.TempDir()
blobbyDir := filepath.Join(ociDir, "blobs")
require.NoError(t, os.Mkdir(blobbyDir, 0o700))
blobDir := filepath.Join(blobbyDir, layerDigest.Algorithm().String())
require.NoError(t, os.Mkdir(blobDir, 0o700))
require.NoError(t, os.WriteFile(filepath.Join(blobDir, layerDigest.Encoded()), layerBuffer.Bytes(), 0o600), "writing layer")
require.NoError(t, os.WriteFile(filepath.Join(blobDir, imageConfigDigest.Encoded()), imageConfigBytes, 0o600), "writing image config")
require.NoError(t, os.WriteFile(filepath.Join(blobDir, imageManifestDigest.Encoded()), imageManifestBytes, 0o600), "writing manifest")
imageIndex := v1.Index{
Versioned: ispec.Versioned{
SchemaVersion: 2,
},
MediaType: v1.MediaTypeImageIndex,
Manifests: []v1.Descriptor{
{
MediaType: v1.MediaTypeImageManifest,
Digest: imageManifestDigest,
Size: int64(len(imageManifestBytes)),
},
},
}
imageIndexBytes, err := json.Marshal(&imageIndex)
require.NoError(t, err, "marshalling image index")
require.NoError(t, os.WriteFile(filepath.Join(ociDir, v1.ImageIndexFile), imageIndexBytes, 0o600), "writing image index")
imageLayout := v1.ImageLayout{
Version: v1.ImageLayoutVersion,
}
imageLayoutBytes, err := json.Marshal(&imageLayout)
require.NoError(t, err, "marshalling image layout")
require.NoError(t, os.WriteFile(filepath.Join(ociDir, v1.ImageLayoutFile), imageLayoutBytes, 0o600), "writing image layout")

// pull the image, twice, just to make sure nothing weird happens
srcRef, err := alltransports.ParseImageName("oci:" + ociDir)
require.NoError(t, err, "building reference to image layout")
destRef, err := imageStorage.Transport.NewStoreReference(store, nil, imageConfigDigest.Encoded())
require.NoError(t, err, "building reference to image in store")
policy, err := signature.NewPolicyFromFile("tests/policy.json")
require.NoError(t, err, "reading signature policy")
policyContext, err := signature.NewPolicyContext(policy)
require.NoError(t, err, "building policy context")
t.Cleanup(func() {
require.NoError(t, policyContext.Destroy(), "destroying policy context")
})
_, err = retryCopyImage(ctx, policyContext, destRef, srcRef, destRef, &cp.Options{}, 3, 1*time.Second)
require.NoError(t, err, "copying image")
_, err = retryCopyImage(ctx, policyContext, destRef, srcRef, destRef, &cp.Options{}, 3, 1*time.Second)
require.NoError(t, err, "copying image")

// now make something weird happen
wrappedStore := &testRetryCopyImageWrappedStore{
phantomImageID: imageConfigDigest.Encoded(),
Store: store,
}
wrappedDestRef, err := imageStorage.Transport.NewStoreReference(wrappedStore, nil, imageConfigDigest.Encoded())
require.NoError(t, err, "building wrapped reference")

// copy with retry-on-storage-layer-unknown = false: expect an error
// (if it succeeds, either the test is broken, or we can remove this
// case from the retry function)
_, err = retryCopyImageWithOptions(ctx, policyContext, wrappedDestRef, srcRef, wrappedDestRef, &cp.Options{}, 3, 1*time.Second, false)
require.ErrorIs(t, err, storage.ErrLayerUnknown, "copying image")

// copy with retry-on-storage-layer-unknown = true: expect no error
_, err = retryCopyImageWithOptions(ctx, policyContext, wrappedDestRef, srcRef, wrappedDestRef, &cp.Options{}, 3, 1*time.Second, true)
require.NoError(t, err, "copying image")
}

0 comments on commit 6d8c293

Please sign in to comment.