diff --git a/copy/copy.go b/copy/copy.go index 485db4d30a..1b431d64b3 100644 --- a/copy/copy.go +++ b/copy/copy.go @@ -654,7 +654,9 @@ func (c *copier) copyOneImage(ctx context.Context, policyContext *signature.Poli if err != nil { logrus.Debugf("Writing manifest using preferred type %s failed: %v", preferredManifestMIMEType, err) // … if it fails, _and_ the failure is because the manifest is rejected, we may have other options. - if _, isManifestRejected := errors.Cause(err).(types.ManifestTypeRejectedError); !isManifestRejected || len(otherManifestMIMETypeCandidates) == 0 { + _, isManifestRejected := errors.Cause(err).(types.ManifestTypeRejectedError) + _, isCompressionIncompatible := errors.Cause(err).(manifest.ManifestLayerCompressionIncompatibilityError) + if (!isManifestRejected && !isCompressionIncompatible) || len(otherManifestMIMETypeCandidates) == 0 { // We don’t have other options. // In principle the code below would handle this as well, but the resulting error message is fairly ugly. // Don’t bother the user with MIME types if we have no choice. @@ -896,7 +898,7 @@ func (ic *imageCopier) copyLayers(ctx context.Context) error { return nil } -// layerDigestsDiffer return true iff the digests in a and b differ (ignoring sizes and possible other fields) +// layerDigestsDiffer returns true iff the digests in a and b differ (ignoring sizes and possible other fields) func layerDigestsDiffer(a, b []types.BlobInfo) bool { if len(a) != len(b) { return true @@ -951,7 +953,7 @@ func (ic *imageCopier) copyUpdatedConfigAndManifest(ctx context.Context, instanc instanceDigest = &manifestDigest } if err := ic.c.dest.PutManifest(ctx, man, instanceDigest); err != nil { - return nil, "", errors.Wrap(err, "Error writing manifest") + return nil, "", errors.Wrapf(err, "Error writing manifest %q", string(man)) } return man, manifestDigest, nil } @@ -1390,7 +1392,7 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr } } - // This is fairly horrible: the writer from getOriginalLayerCopyWriter wants to consumer + // This is fairly horrible: the writer from getOriginalLayerCopyWriter wants to consume // all of the input (to compute DiffIDs), even if dest.PutBlob does not need it. // So, read everything from originalLayerReader, which will cause the rest to be // sent there if we are not already at EOF. diff --git a/directory/directory_dest.go b/directory/directory_dest.go index 2b81c83600..f13a8de9e2 100644 --- a/directory/directory_dest.go +++ b/directory/directory_dest.go @@ -8,6 +8,7 @@ import ( "path/filepath" "runtime" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/types" "github.com/opencontainers/go-digest" "github.com/pkg/errors" @@ -194,7 +195,9 @@ func (d *dirImageDestination) PutBlob(ctx context.Context, stream io.Reader, inp // (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree). // info.Digest must not be empty. // If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input. -// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size. +// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may +// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be +// reflected in the manifest that will be written. // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. // May use and/or update cache. func (d *dirImageDestination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { @@ -202,15 +205,28 @@ func (d *dirImageDestination) TryReusingBlob(ctx context.Context, info types.Blo return false, types.BlobInfo{}, errors.Errorf(`"Can not check for a blob with unknown digest`) } blobPath := d.ref.layerPath(info.Digest) - finfo, err := os.Stat(blobPath) + + f, err := os.Open(blobPath) if err != nil && os.IsNotExist(err) { return false, types.BlobInfo{}, nil } if err != nil { return false, types.BlobInfo{}, err } - return true, types.BlobInfo{Digest: info.Digest, Size: finfo.Size()}, nil + defer f.Close() + + finfo, err := f.Stat() + if err != nil { + return false, types.BlobInfo{}, err + } + + compressionOperation, compressionAlgorithm, _, _, err := blobinfocache.DetectCompressionFormat(f) + if err != nil { + logrus.Debugf("error detecting compression used for %q, ignoring it", blobPath) + return false, types.BlobInfo{}, nil + } + return true, types.BlobInfo{Digest: info.Digest, Size: finfo.Size(), CompressionOperation: compressionOperation, CompressionAlgorithm: compressionAlgorithm}, nil } // PutManifest writes manifest to the destination. diff --git a/docker/docker_image_dest.go b/docker/docker_image_dest.go index ac63ac121f..7de01ca252 100644 --- a/docker/docker_image_dest.go +++ b/docker/docker_image_dest.go @@ -15,10 +15,12 @@ import ( "strings" "github.com/containers/image/v5/docker/reference" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/internal/iolimits" "github.com/containers/image/v5/internal/uploadreader" "github.com/containers/image/v5/manifest" "github.com/containers/image/v5/pkg/blobinfocache/none" + "github.com/containers/image/v5/pkg/compression" "github.com/containers/image/v5/types" "github.com/docker/distribution/registry/api/errcode" v2 "github.com/docker/distribution/registry/api/v2" @@ -162,6 +164,13 @@ func (d *dockerImageDestination) PutBlob(ctx context.Context, stream io.Reader, digester := digest.Canonical.Digester() sizeCounter := &sizeCounter{} + + _, _, compressorName, possiblyCompressedStream, err := blobinfocache.DetectCompressionFormat(stream) + if err != nil { + return types.BlobInfo{}, errors.Wrap(err, "Error checking if input blob is compressed") + } + stream = possiblyCompressedStream + uploadLocation, err = func() (*url.URL, error) { // A scope for defer uploadReader := uploadreader.NewUploadReader(io.TeeReader(stream, io.MultiWriter(digester.Hash(), sizeCounter))) // This error text should never be user-visible, we terminate only after makeRequestToResolvedURL @@ -204,7 +213,8 @@ func (d *dockerImageDestination) PutBlob(ctx context.Context, stream io.Reader, } logrus.Debugf("Upload of layer %s complete", computedDigest) - cache.RecordKnownLocation(d.ref.Transport(), bicTransportScope(d.ref), computedDigest, newBICLocationReference(d.ref)) + bic := blobinfocache.FromBlobInfoCache(cache) + bic.RecordKnownLocation2(d.ref.Transport(), bicTransportScope(d.ref), computedDigest, compressorName, newBICLocationReference(d.ref)) return types.BlobInfo{Digest: computedDigest, Size: sizeCounter.size}, nil } @@ -291,6 +301,7 @@ func (d *dockerImageDestination) TryReusingBlob(ctx context.Context, info types. if info.Digest == "" { return false, types.BlobInfo{}, errors.Errorf(`"Can not check for a blob with unknown digest`) } + bic := blobinfocache.FromBlobInfoCache(cache) // First, check whether the blob happens to already exist at the destination. exists, size, err := d.blobExists(ctx, d.ref.ref, info.Digest, nil) @@ -298,18 +309,31 @@ func (d *dockerImageDestination) TryReusingBlob(ctx context.Context, info types. return false, types.BlobInfo{}, err } if exists { - cache.RecordKnownLocation(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, newBICLocationReference(d.ref)) - return true, types.BlobInfo{Digest: info.Digest, Size: size}, nil + compressorName := blobinfocache.Uncompressed + if info.CompressionAlgorithm != nil { + compressorName = info.CompressionAlgorithm.Name() + } + bic.RecordKnownLocation2(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, compressorName, newBICLocationReference(d.ref)) + return true, types.BlobInfo{Digest: info.Digest, MediaType: info.MediaType, Size: size}, nil } // Then try reusing blobs from other locations. - for _, candidate := range cache.CandidateLocations(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, canSubstitute) { + candidates := bic.CandidateLocations2(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, canSubstitute) + for _, candidate := range candidates { + if candidate.CompressorName == blobinfocache.UnknownCompression { + continue + } + candidateRepo, err := parseBICLocationReference(candidate.Location) if err != nil { logrus.Debugf("Error parsing BlobInfoCache location reference: %s", err) continue } - logrus.Debugf("Trying to reuse cached location %s in %s", candidate.Digest.String(), candidateRepo.Name()) + if candidate.CompressorName != blobinfocache.Uncompressed { + logrus.Debugf("Trying to reuse cached location %s compressed with %s in %s", candidate.Digest.String(), candidate.CompressorName, candidateRepo.Name()) + } else { + logrus.Debugf("Trying to reuse cached location %s with no compression in %s", candidate.Digest.String(), candidateRepo.Name()) + } // Sanity checks: if reference.Domain(candidateRepo) != reference.Domain(d.ref.ref) { @@ -351,8 +375,24 @@ func (d *dockerImageDestination) TryReusingBlob(ctx context.Context, info types. continue } } - cache.RecordKnownLocation(d.ref.Transport(), bicTransportScope(d.ref), candidate.Digest, newBICLocationReference(d.ref)) - return true, types.BlobInfo{Digest: candidate.Digest, Size: size}, nil + + bic.RecordKnownLocation2(d.ref.Transport(), bicTransportScope(d.ref), candidate.Digest, candidate.CompressorName, newBICLocationReference(d.ref)) + + var compressionOperation types.LayerCompression + var compressionAlgorithm *compression.Algorithm + if candidate.CompressorName != blobinfocache.Uncompressed { + algorithm, err := compression.AlgorithmByName(candidate.CompressorName) + if err != nil { + logrus.Debugf("... unknown compressor %q", candidate.CompressorName) + return false, types.BlobInfo{}, nil + } + compressionOperation = types.Compress + compressionAlgorithm = &algorithm + } else { + compressionOperation = types.Decompress + compressionAlgorithm = &compression.Algorithm{} + } + return true, types.BlobInfo{Digest: candidate.Digest, MediaType: info.MediaType, Size: size, CompressionOperation: compressionOperation, CompressionAlgorithm: compressionAlgorithm}, nil } return false, types.BlobInfo{}, nil diff --git a/docker/docker_image_src.go b/docker/docker_image_src.go index 70ca7661e2..eadc0f0f13 100644 --- a/docker/docker_image_src.go +++ b/docker/docker_image_src.go @@ -13,6 +13,7 @@ import ( "strings" "github.com/containers/image/v5/docker/reference" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/internal/iolimits" "github.com/containers/image/v5/manifest" "github.com/containers/image/v5/pkg/sysregistriesv2" @@ -287,8 +288,20 @@ func (s *dockerImageSource) GetBlob(ctx context.Context, info types.BlobInfo, ca if err := httpResponseToError(res, "Error fetching blob"); err != nil { return nil, 0, err } - cache.RecordKnownLocation(s.physicalRef.Transport(), bicTransportScope(s.physicalRef), info.Digest, newBICLocationReference(s.physicalRef)) - return res.Body, getBlobSize(res), nil + _, _, compressorName, bodyReader, err := blobinfocache.DetectCompressionFormat(res.Body) + if err != nil { + return nil, 0, err + } + bic := blobinfocache.FromBlobInfoCache(cache) + bic.RecordKnownLocation2(s.physicalRef.Transport(), bicTransportScope(s.physicalRef), info.Digest, compressorName, newBICLocationReference(s.physicalRef)) + rc := struct { + io.Reader + io.Closer + }{ + Reader: bodyReader, + Closer: res.Body, + } + return &rc, getBlobSize(res), nil } // GetSignatures returns the image's signatures. It may use a remote (= slow) service. diff --git a/docker/tarfile/dest.go b/docker/tarfile/dest.go index e16829d965..4f2465cac4 100644 --- a/docker/tarfile/dest.go +++ b/docker/tarfile/dest.go @@ -86,7 +86,9 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t // (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree). // info.Digest must not be empty. // If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input. -// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size. +// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may +// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be +// reflected in the manifest that will be written. // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. // May use and/or update cache. func (d *Destination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { diff --git a/internal/blobinfocache/blobinfocache.go b/internal/blobinfocache/blobinfocache.go new file mode 100644 index 0000000000..a740741521 --- /dev/null +++ b/internal/blobinfocache/blobinfocache.go @@ -0,0 +1,93 @@ +package blobinfocache + +import ( + "io" + + "github.com/containers/image/v5/pkg/compression" + "github.com/containers/image/v5/types" + digest "github.com/opencontainers/go-digest" +) + +// FromBlobInfoCache2 returns a BlobInfoCache based on a BlobInfoCache2, returning the original +// object if it implements both BlobInfoCache and BlobInfoCache2, or a wrapper which discards +// compression information if it doesn't implement the V1 methods for storing and retrieving +// location information. +func FromBlobInfoCache2(bic BlobInfoCache2WithoutV1) BlobInfoCache2 { + if bic2, ok := bic.(BlobInfoCache2); ok { + return bic2 + } + return &wrappedBlobInfoCache2{ + BlobInfoCache2WithoutV1: bic, + } +} + +type wrappedBlobInfoCache2 struct { + BlobInfoCache2WithoutV1 +} + +func (bic *wrappedBlobInfoCache2) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, location types.BICLocationReference) { + bic.RecordKnownLocation2(transport, scope, digest, UnknownCompression, location) +} + +func (bic *wrappedBlobInfoCache2) CandidateLocations(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, canSubstitute bool) []types.BICReplacementCandidate { + newCandidates := bic.CandidateLocations2(transport, scope, digest, canSubstitute) + results := make([]types.BICReplacementCandidate, 0, len(newCandidates)) + for _, c := range newCandidates { + results = append(results, types.BICReplacementCandidate{ + Digest: c.Digest, + Location: c.Location, + }) + } + return results +} + +// FromBlobInfoCache returns a BlobInfoCache2 based on a BlobInfoCache, returning the original +// object if it implements BlobInfoCache2, or a wrapper which discards compression information +// if it only implements BlobInfoCache. +func FromBlobInfoCache(bic types.BlobInfoCache) BlobInfoCache2 { + if bic2, ok := bic.(BlobInfoCache2); ok { + return bic2 + } + return &wrappedBlobInfoCache{ + BlobInfoCache: bic, + } +} + +type wrappedBlobInfoCache struct { + types.BlobInfoCache +} + +func (bic *wrappedBlobInfoCache) RecordKnownLocation2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, compressorName string, location types.BICLocationReference) { + bic.RecordKnownLocation(transport, scope, digest, location) +} + +func (bic *wrappedBlobInfoCache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, canSubstitute bool) []BICReplacementCandidate2 { + oldCandidates := bic.CandidateLocations(transport, scope, digest, canSubstitute) + results := make([]BICReplacementCandidate2, 0, len(oldCandidates)) + for _, c := range oldCandidates { + results = append(results, BICReplacementCandidate2{ + Digest: c.Digest, + Location: c.Location, + CompressorName: UnknownCompression, + }) + } + return results +} + +// DetectCompressionFormat wraps pkg/compression.DetectCompressionFormat and returns: +// compressionOperation: always either Compress or Decompress, PreserveOriginal on error +// compressionAlgorithm: either the specified algorithm if Compress, or nil if Decompress +// compressorName: compressionAlgorithm.Name() if Compress, Uncompressed if Decompress +// reader: as with pkg/compression.DetectCompressionFormat +// err: set on error +func DetectCompressionFormat(input io.Reader) (compressionOperation types.LayerCompression, compressionAlgorithm *compression.Algorithm, compressorName string, reader io.Reader, err error) { + algo, _, reader, err := compression.DetectCompressionFormat(input) + if err != nil { + return types.PreserveOriginal, nil, UnknownCompression, nil, err + } + if name := algo.Name(); name != "" { + return types.Compress, &algo, name, reader, nil + } else { + return types.Decompress, nil, Uncompressed, reader, nil + } +} diff --git a/internal/blobinfocache/blobinfocache_test.go b/internal/blobinfocache/blobinfocache_test.go new file mode 100644 index 0000000000..0f1f576451 --- /dev/null +++ b/internal/blobinfocache/blobinfocache_test.go @@ -0,0 +1,51 @@ +package blobinfocache + +import ( + "archive/tar" + "bytes" + "testing" + + "github.com/containers/image/v5/pkg/compression" + "github.com/containers/image/v5/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDetectCompressionFormat(t *testing.T) { + cases := []struct { + compressor *compression.Algorithm + compressionOperation types.LayerCompression + compressorName string + }{ + {nil, types.Decompress, Uncompressed}, + {&compression.Gzip, types.Compress, compression.Gzip.Name()}, + {&compression.Zstd, types.Compress, compression.Zstd.Name()}, + {&compression.Xz, types.Compress, compression.Xz.Name()}, + } + for i, c := range cases { + var tw *tar.Writer + closeFn := func() error { return nil } + buf := bytes.Buffer{} + if c.compressor != nil { + wc, err := compression.CompressStream(&buf, *c.compressor, nil) + require.Nilf(t, err, "%d: compression.CompressStream(%q) failed", i, c.compressor.Name()) + tw = tar.NewWriter(wc) + closeFn = wc.Close + } else { + tw = tar.NewWriter(&buf) + } + err := tw.Close() + assert.Nilf(t, err, "%d: tarWriter.Close()", i) + err = closeFn() + assert.Nilf(t, err, "%d: closing compressor writer", i) + op, compressor, compressorName, _, err := DetectCompressionFormat(&buf) + assert.Nilf(t, err, "%d: DetectCompressionFormat", i) + assert.Equalf(t, c.compressionOperation, op, "%d: unexpected compression operation", i) + assert.Equalf(t, c.compressorName, compressorName, "%d: unexpected compressor name", i) + if compressor != nil { + assert.Equalf(t, c.compressorName, compressor.Name(), "%d: unexpected compressor", i) + } else { + assert.Equalf(t, c.compressorName, Uncompressed, "%d: unexpected decompressed", i) + } + } +} diff --git a/internal/blobinfocache/types.go b/internal/blobinfocache/types.go new file mode 100644 index 0000000000..341b1e6340 --- /dev/null +++ b/internal/blobinfocache/types.go @@ -0,0 +1,63 @@ +package blobinfocache + +import ( + "github.com/containers/image/v5/types" + digest "github.com/opencontainers/go-digest" +) + +const ( + // Uncompressed is the value we store in a blob info cache to indicate that we know that the blob in + // the corresponding location is not compressed. + Uncompressed = "uncompressed" + // UnknownCompression s the value we store in a blob info cache to indicate that we don't know if the + // blob in the corresponding location is compressed or not. + UnknownCompression = "unknown" +) + +// BICReplacementCandidate2 is an item returned by BlobInfoCache2.CandidateLocations2. +type BICReplacementCandidate2 struct { + Digest digest.Digest + CompressorName string // either the Name() of a known pkg/compression.Algorithm, or Uncompressed or UnknownCompression + Location types.BICLocationReference +} + +// BlobInfoCache2 extends BlobInfoCache by adding the ability to track information about what kind +// of compression was applied to the blobs it keeps information about. +type BlobInfoCache2 interface { + types.BlobInfoCache + blobInfoCache2Additions +} + +// blobInfoCacheCommon contains methods which are common to both BlobInfoCache and BlobInfoCache2: +// methods which store and retrieve mappings of uncompressed and possibly-compressed digests. +type blobInfoCacheCommon interface { + UncompressedDigest(anyDigest digest.Digest) digest.Digest + RecordDigestUncompressedPair(anyDigest digest.Digest, uncompressed digest.Digest) +} + +// BlobInfoCache2WithoutV1 contains extended versions of the BlobInfoCache methods which add the +// ability to track information about what kind of compression was applied to the blobs it keeps +// information about, and the same methods for managing mappings of digests. +type BlobInfoCache2WithoutV1 interface { + blobInfoCacheCommon + blobInfoCache2Additions +} + +// blobInfoCache2Additions contains extended versions of the BlobInfoCache methods which add the +// ability to track information about what kind of compression was applied to the blobs it keeps +// information about. +type blobInfoCache2Additions interface { + // RecordKnownLocation2 records that a blob with the specified digest exists within the + // specified (transport, scope) scope, compressed with the specified compression algorithm + // (one recognized by pkg/compression.AlgorithmByName()), or Uncompressed or with + // UnknownCompression, and can be reused given the opaque location data. + RecordKnownLocation2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, compressorName string, location types.BICLocationReference) + // CandidateLocations2 returns a prioritized, limited, number of blobs and their locations + // that could possibly be reused within the specified (transport scope) (if they still + // exist, which is not guaranteed). + // + // If !canSubstitute, the returned cadidates will match the submitted digest exactly; if + // canSubstitute, data from previous RecordDigestUncompressedPair calls is used to also look + // up variants of the blob which have the same uncompressed digest. + CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, canSubstitute bool) []BICReplacementCandidate2 +} diff --git a/manifest/common.go b/manifest/common.go index fa2b39e0ea..e0c7b1f9bd 100644 --- a/manifest/common.go +++ b/manifest/common.go @@ -5,7 +5,6 @@ import ( "github.com/containers/image/v5/pkg/compression" "github.com/containers/image/v5/types" - "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -70,15 +69,15 @@ func compressionVariantMIMEType(variantTable []compressionMIMETypeSet, mimeType return res, nil } if name != mtsUncompressed { - return "", fmt.Errorf("%s compression is not supported", name) + return "", ManifestLayerCompressionIncompatibilityError{fmt.Sprintf("%s compression is not supported for type %q", name, mt)} } - return "", errors.New("uncompressed variant is not supported") + return "", ManifestLayerCompressionIncompatibilityError{fmt.Sprintf("uncompressed variant is not supported for type %q", mt)} } if name != mtsUncompressed { return "", fmt.Errorf("unknown compression algorithm %s", name) } // We can't very well say “the idea of no compression is unknown” - return "", errors.New("uncompressed variant is not supported") + return "", ManifestLayerCompressionIncompatibilityError{fmt.Sprintf("uncompressed variant is not supported for type %q", mt)} } } } @@ -99,6 +98,12 @@ func updatedMIMEType(variantTable []compressionMIMETypeSet, mimeType string, upd // {de}compressed. switch updated.CompressionOperation { case types.PreserveOriginal: + // Force a change to the media type if we're being told to use a particular compressor, + // since it might be different from the one associated with the media type. Otherwise, + // try to keep the original media type. + if updated.CompressionAlgorithm != nil && updated.CompressionAlgorithm.Name() != mtsUncompressed { + return compressionVariantMIMEType(variantTable, mimeType, updated.CompressionAlgorithm) + } // Keep the original media type. return mimeType, nil @@ -116,3 +121,11 @@ func updatedMIMEType(variantTable []compressionMIMETypeSet, mimeType string, upd return "", fmt.Errorf("unknown compression operation (%d)", updated.CompressionOperation) } } + +type ManifestLayerCompressionIncompatibilityError struct { + text string +} + +func (m ManifestLayerCompressionIncompatibilityError) Error() string { + return m.text +} diff --git a/oci/archive/oci_dest.go b/oci/archive/oci_dest.go index 23d4713252..c874eb775c 100644 --- a/oci/archive/oci_dest.go +++ b/oci/archive/oci_dest.go @@ -103,7 +103,9 @@ func (d *ociArchiveImageDestination) PutBlob(ctx context.Context, stream io.Read // (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree). // info.Digest must not be empty. // If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input. -// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size. +// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may +// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be +// reflected in the manifest that will be written. // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. // May use and/or update cache. func (d *ociArchiveImageDestination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { diff --git a/oci/layout/oci_dest.go b/oci/layout/oci_dest.go index 0c88e1ef0c..6286a60d5f 100644 --- a/oci/layout/oci_dest.go +++ b/oci/layout/oci_dest.go @@ -9,6 +9,7 @@ import ( "path/filepath" "runtime" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/manifest" "github.com/containers/image/v5/types" digest "github.com/opencontainers/go-digest" @@ -186,7 +187,9 @@ func (d *ociImageDestination) PutBlob(ctx context.Context, stream io.Reader, inp // (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree). // info.Digest must not be empty. // If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input. -// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size. +// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may +// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be +// reflected in the manifest that will be written. // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. // May use and/or update cache. func (d *ociImageDestination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { @@ -197,14 +200,26 @@ func (d *ociImageDestination) TryReusingBlob(ctx context.Context, info types.Blo if err != nil { return false, types.BlobInfo{}, err } - finfo, err := os.Stat(blobPath) + f, err := os.Open(blobPath) if err != nil && os.IsNotExist(err) { return false, types.BlobInfo{}, nil } if err != nil { return false, types.BlobInfo{}, err } - return true, types.BlobInfo{Digest: info.Digest, Size: finfo.Size()}, nil + defer f.Close() + + finfo, err := f.Stat() + if err != nil { + return false, types.BlobInfo{}, err + } + + compressionOperation, compressionAlgorithm, _, _, err := blobinfocache.DetectCompressionFormat(f) + if err != nil { + return false, types.BlobInfo{}, nil + } + + return true, types.BlobInfo{Digest: info.Digest, Size: finfo.Size(), CompressionOperation: compressionOperation, CompressionAlgorithm: compressionAlgorithm}, nil } // PutManifest writes a manifest to the destination. Per our list of supported manifest MIME types, diff --git a/openshift/openshift.go b/openshift/openshift.go index c4c84dd545..426046e662 100644 --- a/openshift/openshift.go +++ b/openshift/openshift.go @@ -410,7 +410,9 @@ func (d *openshiftImageDestination) PutBlob(ctx context.Context, stream io.Reade // (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree). // info.Digest must not be empty. // If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input. -// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size. +// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may +// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be +// reflected in the manifest that will be written. // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. // May use and/or update cache. func (d *openshiftImageDestination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { diff --git a/ostree/ostree_dest.go b/ostree/ostree_dest.go index b518122e29..c91a49c57a 100644 --- a/ostree/ostree_dest.go +++ b/ostree/ostree_dest.go @@ -339,7 +339,9 @@ func (d *ostreeImageDestination) importConfig(repo *otbuiltin.Repo, blob *blobTo // (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree). // info.Digest must not be empty. // If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input. -// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size. +// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may +// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be +// reflected in the manifest that will be written. // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. // May use and/or update cache. func (d *ostreeImageDestination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index 200dab5934..35b120fae9 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -7,6 +7,7 @@ import ( "sync" "time" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/pkg/blobinfocache/internal/prioritize" "github.com/containers/image/v5/types" "github.com/opencontainers/go-digest" @@ -28,6 +29,9 @@ var ( // knownLocationsBucket stores a nested structure of buckets, keyed by (transport name, scope string, blob digest), ultimately containing // a bucket of (opaque location reference, BinaryMarshaller-encoded time.Time value). knownLocationsBucket = []byte("knownLocations") + // knownCompressionBucket stores a nested structure of buckets, keyed by (transport name, scope string, blob digest), ultimately containing + // a bucket of (opaque location reference, compressor-name-as-string value). + knownCompressionBucket = []byte("knownCompression") ) // Concurrency: @@ -95,7 +99,7 @@ type cache struct { // // Most users should call blobinfocache.DefaultCache instead. func New(path string) types.BlobInfoCache { - return &cache{path: path} + return blobinfocache.FromBlobInfoCache2(&cache{path: path}) } // view returns runs the specified fn within a read-only transaction on the database. @@ -220,9 +224,9 @@ func (bdc *cache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompre }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } -// RecordKnownLocation records that a blob with the specified digest exists within the specified (transport, scope) scope, -// and can be reused given the opaque location data. -func (bdc *cache) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, location types.BICLocationReference) { +// RecordKnownLocation2 records that a blob with the specified digest exists within the specified (transport, scope) scope, +// compressed with the specified compression algorithm, and can be reused given the opaque location data. +func (bdc *cache) RecordKnownLocation2(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, compressorName string, location types.BICLocationReference) { _ = bdc.update(func(tx *bolt.Tx) error { b, err := tx.CreateBucketIfNotExists(knownLocationsBucket) if err != nil { @@ -247,25 +251,59 @@ func (bdc *cache) RecordKnownLocation(transport types.ImageTransport, scope type if err := b.Put([]byte(location.Opaque), value); err != nil { // Possibly overwriting an older entry. return err } + + b, err = tx.CreateBucketIfNotExists(knownCompressionBucket) + if err != nil { + return err + } + b, err = b.CreateBucketIfNotExists([]byte(transport.Name())) + if err != nil { + return err + } + b, err = b.CreateBucketIfNotExists([]byte(scope.Opaque)) + if err != nil { + return err + } + b, err = b.CreateBucketIfNotExists([]byte(blobDigest.String())) + if err != nil { + return err + } + if compressorName != blobinfocache.UnknownCompression { + if err := b.Put([]byte(location.Opaque), []byte(compressorName)); err != nil { // Possibly overwriting an older entry. + return err + } + } return nil }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } // appendReplacementCandiates creates prioritize.CandidateWithTime values for digest in scopeBucket, and returns the result of appending them to candidates. -func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, scopeBucket *bolt.Bucket, digest digest.Digest) []prioritize.CandidateWithTime { - b := scopeBucket.Bucket([]byte(digest.String())) - if b == nil { +func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, dateBucket, compressionBucket *bolt.Bucket, digest digest.Digest) []prioritize.CandidateWithTime { + digestDateBucket := dateBucket.Bucket([]byte(digest.String())) + if digestDateBucket == nil { return candidates } - _ = b.ForEach(func(k, v []byte) error { + if compressionBucket != nil { + // compressionBucket won't have been created if previous writers never recorded info about compression, + // and we don't want to fail just because of that + compressionBucket = compressionBucket.Bucket([]byte(digest.String())) + } + _ = digestDateBucket.ForEach(func(k, v []byte) error { t := time.Time{} if err := t.UnmarshalBinary(v); err != nil { return err } + compressorName := blobinfocache.UnknownCompression + if compressionBucket != nil { + if compressorNameValue := compressionBucket.Get(k); len(compressorNameValue) > 0 { + compressorName = string(compressorNameValue) + } + } candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: types.BICReplacementCandidate{ - Digest: digest, - Location: types.BICLocationReference{Opaque: string(k)}, + Candidate: blobinfocache.BICReplacementCandidate2{ + Digest: digest, + CompressorName: compressorName, + Location: types.BICLocationReference{Opaque: string(k)}, }, LastSeen: t, }) @@ -274,30 +312,39 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW return candidates } -// CandidateLocations returns a prioritized, limited, number of blobs and their locations that could possibly be reused +// CandidateLocations2 returns a prioritized, limited, number of blobs and their locations that could possibly be reused // within the specified (transport scope) (if they still exist, which is not guaranteed). // // If !canSubstitute, the returned cadidates will match the submitted digest exactly; if canSubstitute, // data from previous RecordDigestUncompressedPair calls is used to also look up variants of the blob which have the same // uncompressed digest. -func (bdc *cache) CandidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute bool) []types.BICReplacementCandidate { +func (bdc *cache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute bool) []blobinfocache.BICReplacementCandidate2 { res := []prioritize.CandidateWithTime{} var uncompressedDigestValue digest.Digest // = "" if err := bdc.view(func(tx *bolt.Tx) error { - scopeBucket := tx.Bucket(knownLocationsBucket) - if scopeBucket == nil { + dateBucket := tx.Bucket(knownLocationsBucket) + if dateBucket == nil { return nil } - scopeBucket = scopeBucket.Bucket([]byte(transport.Name())) - if scopeBucket == nil { + dateBucket = dateBucket.Bucket([]byte(transport.Name())) + if dateBucket == nil { return nil } - scopeBucket = scopeBucket.Bucket([]byte(scope.Opaque)) - if scopeBucket == nil { + dateBucket = dateBucket.Bucket([]byte(scope.Opaque)) + if dateBucket == nil { return nil } + // compressionBucket won't have been created if previous writers never recorded info about compression, + // and we don't want to fail just because of that + compressionBucket := tx.Bucket(knownCompressionBucket) + if compressionBucket != nil { + compressionBucket = compressionBucket.Bucket([]byte(transport.Name())) + if compressionBucket != nil { + compressionBucket = compressionBucket.Bucket([]byte(scope.Opaque)) + } + } - res = bdc.appendReplacementCandidates(res, scopeBucket, primaryDigest) + res = bdc.appendReplacementCandidates(res, dateBucket, compressionBucket, primaryDigest) if canSubstitute { if uncompressedDigestValue = bdc.uncompressedDigest(tx, primaryDigest); uncompressedDigestValue != "" { b := tx.Bucket(digestByUncompressedBucket) @@ -310,7 +357,7 @@ func (bdc *cache) CandidateLocations(transport types.ImageTransport, scope types return err } if d != primaryDigest && d != uncompressedDigestValue { - res = bdc.appendReplacementCandidates(res, scopeBucket, d) + res = bdc.appendReplacementCandidates(res, dateBucket, compressionBucket, d) } return nil }); err != nil { @@ -319,13 +366,13 @@ func (bdc *cache) CandidateLocations(transport types.ImageTransport, scope types } } if uncompressedDigestValue != primaryDigest { - res = bdc.appendReplacementCandidates(res, scopeBucket, uncompressedDigestValue) + res = bdc.appendReplacementCandidates(res, dateBucket, compressionBucket, uncompressedDigestValue) } } } return nil }); err != nil { // Including os.IsNotExist(err) - return []types.BICReplacementCandidate{} // FIXME? Log err (but throttle the log volume on repeated accesses)? + return []blobinfocache.BICReplacementCandidate2{} // FIXME? Log err (but throttle the log volume on repeated accesses)? } return prioritize.DestructivelyPrioritizeReplacementCandidates(res, primaryDigest, uncompressedDigestValue) diff --git a/pkg/blobinfocache/boltdb/boltdb_test.go b/pkg/blobinfocache/boltdb/boltdb_test.go index 079ea280f4..6f370ae64e 100644 --- a/pkg/blobinfocache/boltdb/boltdb_test.go +++ b/pkg/blobinfocache/boltdb/boltdb_test.go @@ -6,19 +6,21 @@ import ( "path/filepath" "testing" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/pkg/blobinfocache/internal/test" - "github.com/containers/image/v5/types" "github.com/stretchr/testify/require" ) -func newTestCache(t *testing.T) (types.BlobInfoCache, func(t *testing.T)) { +var _ blobinfocache.BlobInfoCache2WithoutV1 = &cache{} + +func newTestCache(t *testing.T) (blobinfocache.BlobInfoCache2, func(t *testing.T)) { // We need a separate temporary directory here, because bolt.Open(…, &bolt.Options{Readonly:true}) can't deal with // an existing but empty file, and incorrectly fails without releasing the lock - which in turn causes // any future writes to hang. Creating a temporary directory allows us to use a path to a // non-existent file, thus replicating the expected conditions for creating a new DB. dir, err := ioutil.TempDir("", "boltdb") require.NoError(t, err) - return New(filepath.Join(dir, "db")), func(t *testing.T) { + return blobinfocache.FromBlobInfoCache(New(filepath.Join(dir, "db"))), func(t *testing.T) { err = os.RemoveAll(dir) require.NoError(t, err) } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 5deca4a82d..6f5506d94c 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -6,7 +6,7 @@ import ( "sort" "time" - "github.com/containers/image/v5/types" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/opencontainers/go-digest" ) @@ -17,8 +17,8 @@ const replacementAttempts = 5 // CandidateWithTime is the input to types.BICReplacementCandidate prioritization. type CandidateWithTime struct { - Candidate types.BICReplacementCandidate // The replacement candidate - LastSeen time.Time // Time the candidate was last known to exist (either read or written) + Candidate blobinfocache.BICReplacementCandidate2 // The replacement candidate + LastSeen time.Time // Time the candidate was last known to exist (either read or written) } // candidateSortState is a local state implementing sort.Interface on candidates to prioritize, @@ -79,7 +79,7 @@ func (css *candidateSortState) Swap(i, j int) { // destructivelyPrioritizeReplacementCandidatesWithMax is destructivelyPrioritizeReplacementCandidates with a parameter for the // number of entries to limit, only to make testing simpler. -func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, primaryDigest, uncompressedDigest digest.Digest, maxCandidates int) []types.BICReplacementCandidate { +func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, primaryDigest, uncompressedDigest digest.Digest, maxCandidates int) []blobinfocache.BICReplacementCandidate2 { // We don't need to use sort.Stable() because nanosecond timestamps are (presumably?) unique, so no two elements should // compare equal. sort.Sort(&candidateSortState{ @@ -92,7 +92,7 @@ func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, if resLength > maxCandidates { resLength = maxCandidates } - res := make([]types.BICReplacementCandidate, resLength) + res := make([]blobinfocache.BICReplacementCandidate2, resLength) for i := range res { res[i] = cs[i].Candidate } @@ -105,6 +105,6 @@ func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, // // WARNING: The array of candidates is destructively modified. (The implementation of this function could of course // make a copy, but all CandidateLocations implementations build the slice of candidates only for the single purpose of calling this function anyway.) -func DestructivelyPrioritizeReplacementCandidates(cs []CandidateWithTime, primaryDigest, uncompressedDigest digest.Digest) []types.BICReplacementCandidate { +func DestructivelyPrioritizeReplacementCandidates(cs []CandidateWithTime, primaryDigest, uncompressedDigest digest.Digest) []blobinfocache.BICReplacementCandidate2 { return destructivelyPrioritizeReplacementCandidatesWithMax(cs, primaryDigest, uncompressedDigest, replacementAttempts) } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize_test.go b/pkg/blobinfocache/internal/prioritize/prioritize_test.go index 7e4257d60d..6f7aadcc3f 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize_test.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/types" "github.com/opencontainers/go-digest" "github.com/stretchr/testify/assert" @@ -21,20 +22,20 @@ var ( // cssLiteral contains a non-trivial candidateSortState shared among several tests below. cssLiteral = candidateSortState{ cs: []CandidateWithTime{ - {types.BICReplacementCandidate{Digest: digestCompressedA, Location: types.BICLocationReference{Opaque: "A1"}}, time.Unix(1, 0)}, - {types.BICReplacementCandidate{Digest: digestUncompressed, Location: types.BICLocationReference{Opaque: "U2"}}, time.Unix(1, 1)}, - {types.BICReplacementCandidate{Digest: digestCompressedA, Location: types.BICLocationReference{Opaque: "A2"}}, time.Unix(1, 1)}, - {types.BICReplacementCandidate{Digest: digestCompressedPrimary, Location: types.BICLocationReference{Opaque: "P1"}}, time.Unix(1, 0)}, - {types.BICReplacementCandidate{Digest: digestCompressedB, Location: types.BICLocationReference{Opaque: "B1"}}, time.Unix(1, 1)}, - {types.BICReplacementCandidate{Digest: digestCompressedPrimary, Location: types.BICLocationReference{Opaque: "P2"}}, time.Unix(1, 1)}, - {types.BICReplacementCandidate{Digest: digestCompressedB, Location: types.BICLocationReference{Opaque: "B2"}}, time.Unix(2, 0)}, - {types.BICReplacementCandidate{Digest: digestUncompressed, Location: types.BICLocationReference{Opaque: "U1"}}, time.Unix(1, 0)}, + {blobinfocache.BICReplacementCandidate2{Digest: digestCompressedA, Location: types.BICLocationReference{Opaque: "A1"}}, time.Unix(1, 0)}, + {blobinfocache.BICReplacementCandidate2{Digest: digestUncompressed, Location: types.BICLocationReference{Opaque: "U2"}}, time.Unix(1, 1)}, + {blobinfocache.BICReplacementCandidate2{Digest: digestCompressedA, Location: types.BICLocationReference{Opaque: "A2"}}, time.Unix(1, 1)}, + {blobinfocache.BICReplacementCandidate2{Digest: digestCompressedPrimary, Location: types.BICLocationReference{Opaque: "P1"}}, time.Unix(1, 0)}, + {blobinfocache.BICReplacementCandidate2{Digest: digestCompressedB, Location: types.BICLocationReference{Opaque: "B1"}}, time.Unix(1, 1)}, + {blobinfocache.BICReplacementCandidate2{Digest: digestCompressedPrimary, Location: types.BICLocationReference{Opaque: "P2"}}, time.Unix(1, 1)}, + {blobinfocache.BICReplacementCandidate2{Digest: digestCompressedB, Location: types.BICLocationReference{Opaque: "B2"}}, time.Unix(2, 0)}, + {blobinfocache.BICReplacementCandidate2{Digest: digestUncompressed, Location: types.BICLocationReference{Opaque: "U1"}}, time.Unix(1, 0)}, }, primaryDigest: digestCompressedPrimary, uncompressedDigest: digestUncompressed, } // cssExpectedReplacementCandidates is the fully-sorted, unlimited, result of prioritizing cssLiteral. - cssExpectedReplacementCandidates = []types.BICReplacementCandidate{ + cssExpectedReplacementCandidates = []blobinfocache.BICReplacementCandidate2{ {Digest: digestCompressedPrimary, Location: types.BICLocationReference{Opaque: "P2"}}, {Digest: digestCompressedPrimary, Location: types.BICLocationReference{Opaque: "P1"}}, {Digest: digestCompressedB, Location: types.BICLocationReference{Opaque: "B2"}}, @@ -74,8 +75,8 @@ func TestCandidateSortStateLess(t *testing.T) { caseName := fmt.Sprintf("%s %v", c.name, tms) css := candidateSortState{ cs: []CandidateWithTime{ - {types.BICReplacementCandidate{Digest: c.d0, Location: types.BICLocationReference{Opaque: "L0"}}, time.Unix(tms[0], 0)}, - {types.BICReplacementCandidate{Digest: c.d1, Location: types.BICLocationReference{Opaque: "L1"}}, time.Unix(tms[1], 0)}, + {blobinfocache.BICReplacementCandidate2{Digest: c.d0, Location: types.BICLocationReference{Opaque: "L0"}}, time.Unix(tms[0], 0)}, + {blobinfocache.BICReplacementCandidate2{Digest: c.d1, Location: types.BICLocationReference{Opaque: "L1"}}, time.Unix(tms[1], 0)}, }, primaryDigest: digestCompressedPrimary, uncompressedDigest: digestUncompressed, @@ -113,8 +114,8 @@ func TestCandidateSortStateLess(t *testing.T) { } { css := candidateSortState{ cs: []CandidateWithTime{ - {types.BICReplacementCandidate{Digest: c.p0.d, Location: types.BICLocationReference{Opaque: "L0"}}, time.Unix(c.p0.t, 0)}, - {types.BICReplacementCandidate{Digest: c.p1.d, Location: types.BICLocationReference{Opaque: "L1"}}, time.Unix(c.p1.t, 0)}, + {blobinfocache.BICReplacementCandidate2{Digest: c.p0.d, Location: types.BICLocationReference{Opaque: "L0"}}, time.Unix(c.p0.t, 0)}, + {blobinfocache.BICReplacementCandidate2{Digest: c.p1.d, Location: types.BICLocationReference{Opaque: "L1"}}, time.Unix(c.p1.t, 0)}, }, primaryDigest: digestCompressedPrimary, uncompressedDigest: digestUncompressed, diff --git a/pkg/blobinfocache/internal/test/test.go b/pkg/blobinfocache/internal/test/test.go index b415e9c40e..478e0083a6 100644 --- a/pkg/blobinfocache/internal/test/test.go +++ b/pkg/blobinfocache/internal/test/test.go @@ -4,6 +4,7 @@ package test import ( "testing" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/internal/testing/mocks" "github.com/containers/image/v5/types" digest "github.com/opencontainers/go-digest" @@ -16,19 +17,25 @@ const ( digestCompressedA = digest.Digest("sha256:3333333333333333333333333333333333333333333333333333333333333333") digestCompressedB = digest.Digest("sha256:4444444444444444444444444444444444444444444444444444444444444444") digestCompressedUnrelated = digest.Digest("sha256:5555555555555555555555555555555555555555555555555555555555555555") + compressorNameU = "compressorName/U" + compressorNameA = "compressorName/A" + compressorNameB = "compressorName/B" + compressorNameCU = "compressorName/CU" ) // GenericCache runs an implementation-independent set of tests, given a // newTestCache, which can be called repeatedly and always returns a (cache, cleanup callback) pair -func GenericCache(t *testing.T, newTestCache func(t *testing.T) (types.BlobInfoCache, func(t *testing.T))) { +func GenericCache(t *testing.T, newTestCache func(t *testing.T) (blobinfocache.BlobInfoCache2, func(t *testing.T))) { for _, s := range []struct { name string - fn func(t *testing.T, cache types.BlobInfoCache) + fn func(t *testing.T, cache blobinfocache.BlobInfoCache2) }{ {"UncompressedDigest", testGenericUncompressedDigest}, {"RecordDigestUncompressedPair", testGenericRecordDigestUncompressedPair}, {"RecordKnownLocations", testGenericRecordKnownLocations}, {"CandidateLocations", testGenericCandidateLocations}, + {"RecordKnownLocations2", testGenericRecordKnownLocations2}, + {"CandidateLocations2", testGenericCandidateLocations2}, } { t.Run(s.name, func(t *testing.T) { cache, cleanup := newTestCache(t) @@ -38,7 +45,7 @@ func GenericCache(t *testing.T, newTestCache func(t *testing.T) (types.BlobInfoC } } -func testGenericUncompressedDigest(t *testing.T, cache types.BlobInfoCache) { +func testGenericUncompressedDigest(t *testing.T, cache blobinfocache.BlobInfoCache2) { // Nothing is known. assert.Equal(t, digest.Digest(""), cache.UncompressedDigest(digestUnknown)) @@ -55,7 +62,7 @@ func testGenericUncompressedDigest(t *testing.T, cache types.BlobInfoCache) { assert.Equal(t, digestCompressedUnrelated, cache.UncompressedDigest(digestCompressedUnrelated)) } -func testGenericRecordDigestUncompressedPair(t *testing.T, cache types.BlobInfoCache) { +func testGenericRecordDigestUncompressedPair(t *testing.T, cache blobinfocache.BlobInfoCache2) { for i := 0; i < 2; i++ { // Record the same data twice to ensure redundant writes don’t break things. // Known compressed→uncompressed mapping cache.RecordDigestUncompressedPair(digestCompressedA, digestUncompressed) @@ -70,7 +77,7 @@ func testGenericRecordDigestUncompressedPair(t *testing.T, cache types.BlobInfoC } } -func testGenericRecordKnownLocations(t *testing.T, cache types.BlobInfoCache) { +func testGenericRecordKnownLocations(t *testing.T, cache blobinfocache.BlobInfoCache2) { transport := mocks.NameImageTransport("==BlobInfocache transport mock") for i := 0; i < 2; i++ { // Record the same data twice to ensure redundant writes don’t break things. for _, scopeName := range []string{"A", "B"} { // Run the test in two different scopes to verify they don't affect each other. @@ -84,6 +91,33 @@ func testGenericRecordKnownLocations(t *testing.T, cache types.BlobInfoCache) { {Digest: digest, Location: lr1}, {Digest: digest, Location: lr2}, }, cache.CandidateLocations(transport, scope, digest, false)) + assert.Equal(t, []blobinfocache.BICReplacementCandidate2{ + {Digest: digest, CompressorName: blobinfocache.UnknownCompression, Location: lr1}, + {Digest: digest, CompressorName: blobinfocache.UnknownCompression, Location: lr2}, + }, cache.CandidateLocations2(transport, scope, digest, false)) + } + } + } +} + +func testGenericRecordKnownLocations2(t *testing.T, cache blobinfocache.BlobInfoCache2) { + transport := mocks.NameImageTransport("==BlobInfocache transport mock") + for i := 0; i < 2; i++ { // Record the same data twice to ensure redundant writes don’t break things. + for _, scopeName := range []string{"A", "B"} { // Run the test in two different scopes to verify they don't affect each other. + scope := types.BICTransportScope{Opaque: scopeName} + for _, digest := range []digest.Digest{digestCompressedA, digestCompressedB} { // Two different digests should not affect each other either. + lr1 := types.BICLocationReference{Opaque: scopeName + "1"} + lr2 := types.BICLocationReference{Opaque: scopeName + "2"} + cache.RecordKnownLocation2(transport, scope, digest, "type2", lr2) + cache.RecordKnownLocation2(transport, scope, digest, "type1", lr1) + assert.Equal(t, []types.BICReplacementCandidate{ + {Digest: digest, Location: lr1}, + {Digest: digest, Location: lr2}, + }, cache.CandidateLocations(transport, scope, digest, false)) + assert.Equal(t, []blobinfocache.BICReplacementCandidate2{ + {Digest: digest, CompressorName: "type1", Location: lr1}, + {Digest: digest, CompressorName: "type2", Location: lr2}, + }, cache.CandidateLocations2(transport, scope, digest, false)) } } } @@ -92,6 +126,7 @@ func testGenericRecordKnownLocations(t *testing.T, cache types.BlobInfoCache) { // candidate is a shorthand for types.BICReplacementCandiddate type candidate struct { d digest.Digest + cn string lr string } @@ -103,7 +138,15 @@ func assertCandidatesMatch(t *testing.T, scopeName string, expected []candidate, assert.Equal(t, e, actual) } -func testGenericCandidateLocations(t *testing.T, cache types.BlobInfoCache) { +func assertCandidatesMatch2(t *testing.T, scopeName string, expected []candidate, actual []blobinfocache.BICReplacementCandidate2) { + e := make([]blobinfocache.BICReplacementCandidate2, len(expected)) + for i, ev := range expected { + e[i] = blobinfocache.BICReplacementCandidate2{Digest: ev.d, CompressorName: ev.cn, Location: types.BICLocationReference{Opaque: scopeName + ev.lr}} + } + assert.Equal(t, e, actual) +} + +func testGenericCandidateLocations(t *testing.T, cache blobinfocache.BlobInfoCache2) { transport := mocks.NameImageTransport("==BlobInfocache transport mock") cache.RecordDigestUncompressedPair(digestCompressedA, digestUncompressed) cache.RecordDigestUncompressedPair(digestCompressedB, digestUncompressed) @@ -164,6 +207,70 @@ func testGenericCandidateLocations(t *testing.T, cache types.BlobInfoCache) { assertCandidatesMatch(t, scopeName, []candidate{ {d: digestCompressedUnrelated, lr: "CU1"}, {d: digestCompressedUnrelated, lr: "CU2"}, }, cache.CandidateLocations(transport, scope, digestCompressedUnrelated, true)) + } +} + +func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCache2) { + transport := mocks.NameImageTransport("==BlobInfocache transport mock") + cache.RecordDigestUncompressedPair(digestCompressedA, digestUncompressed) + cache.RecordDigestUncompressedPair(digestCompressedB, digestUncompressed) + cache.RecordDigestUncompressedPair(digestUncompressed, digestUncompressed) + digestNameSet := []struct { + n string + d digest.Digest + m string + }{ + {"U", digestUncompressed, compressorNameU}, + {"A", digestCompressedA, compressorNameA}, + {"B", digestCompressedB, compressorNameB}, + {"CU", digestCompressedUnrelated, compressorNameCU}, + } + + for _, scopeName := range []string{"A", "B"} { // Run the test in two different scopes to verify they don't affect each other. + scope := types.BICTransportScope{Opaque: scopeName} + // Nothing is known. + assert.Equal(t, []blobinfocache.BICReplacementCandidate2{}, cache.CandidateLocations2(transport, scope, digestUnknown, false)) + assert.Equal(t, []blobinfocache.BICReplacementCandidate2{}, cache.CandidateLocations2(transport, scope, digestUnknown, true)) + // Record "2" entries before "1" entries; then results should sort "1" (more recent) before "2" (older) + for _, suffix := range []string{"2", "1"} { + for _, e := range digestNameSet { + cache.RecordKnownLocation2(transport, scope, e.d, e.m, types.BICLocationReference{Opaque: scopeName + e.n + suffix}) + } + } + + // No substitutions allowed: + for _, e := range digestNameSet { + assertCandidatesMatch2(t, scopeName, []candidate{ + {d: e.d, cn: e.m, lr: e.n + "1"}, {d: e.d, cn: e.m, lr: e.n + "2"}, + }, cache.CandidateLocations2(transport, scope, e.d, false)) + } + + // With substitutions: The original digest is always preferred, then other compressed, then the uncompressed one. + assertCandidatesMatch2(t, scopeName, []candidate{ + {d: digestCompressedA, cn: compressorNameA, lr: "A1"}, {d: digestCompressedA, cn: compressorNameA, lr: "A2"}, + {d: digestCompressedB, cn: compressorNameB, lr: "B1"}, {d: digestCompressedB, cn: compressorNameB, lr: "B2"}, + {d: digestUncompressed, cn: compressorNameU, lr: "U1"}, // Beyond the replacementAttempts limit: {d: digestUncompressed, cn: compressorNameCU, lr: "U2"}, + }, cache.CandidateLocations2(transport, scope, digestCompressedA, true)) + + assertCandidatesMatch2(t, scopeName, []candidate{ + {d: digestCompressedB, cn: compressorNameB, lr: "B1"}, {d: digestCompressedB, cn: compressorNameB, lr: "B2"}, + {d: digestCompressedA, cn: compressorNameA, lr: "A1"}, {d: digestCompressedA, cn: compressorNameA, lr: "A2"}, + {d: digestUncompressed, cn: compressorNameU, lr: "U1"}, // Beyond the replacementAttempts limit: {d: digestUncompressed, lr: "U2"}, + }, cache.CandidateLocations2(transport, scope, digestCompressedB, true)) + + assertCandidatesMatch2(t, scopeName, []candidate{ + {d: digestUncompressed, cn: compressorNameU, lr: "U1"}, {d: digestUncompressed, cn: compressorNameU, lr: "U2"}, + // "1" entries were added after "2", and A/Bs are sorted in the reverse of digestNameSet order + {d: digestCompressedB, cn: compressorNameB, lr: "B1"}, + {d: digestCompressedA, cn: compressorNameA, lr: "A1"}, + {d: digestCompressedB, cn: compressorNameB, lr: "B2"}, + // Beyond the replacementAttempts limit: {d: digestCompressedA, lr: "A2"}, + }, cache.CandidateLocations2(transport, scope, digestUncompressed, true)) + + // Locations are known, but no relationships + assertCandidatesMatch2(t, scopeName, []candidate{ + {d: digestCompressedUnrelated, cn: compressorNameCU, lr: "CU1"}, {d: digestCompressedUnrelated, cn: compressorNameCU, lr: "CU2"}, + }, cache.CandidateLocations2(transport, scope, digestCompressedUnrelated, true)) } } diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index 8f28c66238..dbfd271d67 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -5,6 +5,7 @@ import ( "sync" "time" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/pkg/blobinfocache/internal/prioritize" "github.com/containers/image/v5/types" digest "github.com/opencontainers/go-digest" @@ -18,13 +19,18 @@ type locationKey struct { blobDigest digest.Digest } +type knownLocationInfo struct { + when time.Time + compressorName string +} + // cache implements an in-memory-only BlobInfoCache type cache struct { mutex sync.Mutex // The following fields can only be accessed with mutex held. uncompressedDigests map[digest.Digest]digest.Digest - digestsByUncompressed map[digest.Digest]map[digest.Digest]struct{} // stores a set of digests for each uncompressed digest - knownLocations map[locationKey]map[types.BICLocationReference]time.Time // stores last known existence time for each location reference + digestsByUncompressed map[digest.Digest]map[digest.Digest]struct{} // stores a set of digests for each uncompressed digest + knownLocations map[locationKey]map[types.BICLocationReference]knownLocationInfo // stores last known existence time and compression type for each location reference } // New returns a BlobInfoCache implementation which is in-memory only. @@ -36,11 +42,11 @@ type cache struct { // Manual users of types.{ImageSource,ImageDestination} might also use // this instead of a persistent cache. func New() types.BlobInfoCache { - return &cache{ + return blobinfocache.FromBlobInfoCache2(&cache{ uncompressedDigests: map[digest.Digest]digest.Digest{}, digestsByUncompressed: map[digest.Digest]map[digest.Digest]struct{}{}, - knownLocations: map[locationKey]map[types.BICLocationReference]time.Time{}, - } + knownLocations: map[locationKey]map[types.BICLocationReference]knownLocationInfo{}, + }) } // UncompressedDigest returns an uncompressed digest corresponding to anyDigest. @@ -87,18 +93,19 @@ func (mem *cache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompre anyDigestSet[anyDigest] = struct{}{} // Possibly writing the same struct{}{} presence marker again. } -// RecordKnownLocation records that a blob with the specified digest exists within the specified (transport, scope) scope, -// and can be reused given the opaque location data. -func (mem *cache) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, location types.BICLocationReference) { +// RecordKnownLocation2 records that a blob with the specified digest, whether compressed with the specified algorithm or +// uncompressed if compressorName is "", exists within the specified (transport, scope) scope, and can be reused given +// the opaque location data. +func (mem *cache) RecordKnownLocation2(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, compressorName string, location types.BICLocationReference) { mem.mutex.Lock() defer mem.mutex.Unlock() key := locationKey{transport: transport.Name(), scope: scope, blobDigest: blobDigest} locationScope, ok := mem.knownLocations[key] if !ok { - locationScope = map[types.BICLocationReference]time.Time{} + locationScope = map[types.BICLocationReference]knownLocationInfo{} mem.knownLocations[key] = locationScope } - locationScope[location] = time.Now() // Possibly overwriting an older entry. + locationScope[location] = knownLocationInfo{when: time.Now(), compressorName: compressorName} // Possibly overwriting an older entry. } // appendReplacementCandiates creates prioritize.CandidateWithTime values for (transport, scope, digest), and returns the result of appending them to candidates. @@ -106,23 +113,24 @@ func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateW locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: scope, blobDigest: digest}] // nil if not present for l, t := range locations { candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: types.BICReplacementCandidate{ - Digest: digest, - Location: l, + Candidate: blobinfocache.BICReplacementCandidate2{ + Digest: digest, + CompressorName: t.compressorName, + Location: l, }, - LastSeen: t, + LastSeen: t.when, }) } return candidates } -// CandidateLocations returns a prioritized, limited, number of blobs and their locations that could possibly be reused +// CandidateLocations2 returns a prioritized, limited, number of blobs and their locations that could possibly be reused // within the specified (transport scope) (if they still exist, which is not guaranteed). // // If !canSubstitute, the returned cadidates will match the submitted digest exactly; if canSubstitute, // data from previous RecordDigestUncompressedPair calls is used to also look up variants of the blob which have the same // uncompressed digest. -func (mem *cache) CandidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute bool) []types.BICReplacementCandidate { +func (mem *cache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute bool) []blobinfocache.BICReplacementCandidate2 { mem.mutex.Lock() defer mem.mutex.Unlock() res := []prioritize.CandidateWithTime{} diff --git a/pkg/blobinfocache/memory/memory_test.go b/pkg/blobinfocache/memory/memory_test.go index 63040f8908..8b833cf016 100644 --- a/pkg/blobinfocache/memory/memory_test.go +++ b/pkg/blobinfocache/memory/memory_test.go @@ -3,12 +3,14 @@ package memory import ( "testing" + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/pkg/blobinfocache/internal/test" - "github.com/containers/image/v5/types" ) -func newTestCache(t *testing.T) (types.BlobInfoCache, func(t *testing.T)) { - return New(), func(t *testing.T) {} +var _ blobinfocache.BlobInfoCache2WithoutV1 = &cache{} + +func newTestCache(t *testing.T) (blobinfocache.BlobInfoCache2, func(t *testing.T)) { + return blobinfocache.FromBlobInfoCache(New()), func(t *testing.T) {} } func TestNew(t *testing.T) { diff --git a/pkg/blobinfocache/none/none.go b/pkg/blobinfocache/none/none.go index fa1879afdb..5a3448a389 100644 --- a/pkg/blobinfocache/none/none.go +++ b/pkg/blobinfocache/none/none.go @@ -2,6 +2,7 @@ package none import ( + "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/types" "github.com/opencontainers/go-digest" ) @@ -16,7 +17,7 @@ type noCache struct { // Manifest.Inspect, because configs only have one representation. // Any use of BlobInfoCache with blobs should usually use at least a // short-lived cache, ideally blobinfocache.DefaultCache. -var NoCache types.BlobInfoCache = noCache{} +var NoCache blobinfocache.BlobInfoCache2 = blobinfocache.FromBlobInfoCache2(&noCache{}) // UncompressedDigest returns an uncompressed digest corresponding to anyDigest. // May return anyDigest if it is known to be uncompressed. @@ -33,17 +34,17 @@ func (noCache) UncompressedDigest(anyDigest digest.Digest) digest.Digest { func (noCache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompressed digest.Digest) { } -// RecordKnownLocation records that a blob with the specified digest exists within the specified (transport, scope) scope, -// and can be reused given the opaque location data. -func (noCache) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, location types.BICLocationReference) { +// RecordKnownLocation2 records that a blob with the specified digest exists within the specified (transport, scope) scope, +// compressed with the specified compression algorithm, and can be reused given the opaque location data. +func (noCache) RecordKnownLocation2(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, compressorName string, location types.BICLocationReference) { } -// CandidateLocations returns a prioritized, limited, number of blobs and their locations that could possibly be reused +// CandidateLocations2 returns a prioritized, limited, number of blobs and their locations that could possibly be reused // within the specified (transport scope) (if they still exist, which is not guaranteed). // // If !canSubstitute, the returned cadidates will match the submitted digest exactly; if canSubstitute, // data from previous RecordDigestUncompressedPair calls is used to also look up variants of the blob which have the same // uncompressed digest. -func (noCache) CandidateLocations(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, canSubstitute bool) []types.BICReplacementCandidate { +func (noCache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, canSubstitute bool) []blobinfocache.BICReplacementCandidate2 { return nil } diff --git a/pkg/blobinfocache/none/none_test.go b/pkg/blobinfocache/none/none_test.go new file mode 100644 index 0000000000..92b33c109b --- /dev/null +++ b/pkg/blobinfocache/none/none_test.go @@ -0,0 +1,7 @@ +package none + +import ( + "github.com/containers/image/v5/internal/blobinfocache" +) + +var _ blobinfocache.BlobInfoCache2WithoutV1 = &noCache{} diff --git a/types/types.go b/types/types.go index 3c5126b4e5..a93f2e296c 100644 --- a/types/types.go +++ b/types/types.go @@ -194,6 +194,9 @@ type BICReplacementCandidate struct { // // None of the methods return an error indication: errors when neither reading from, nor writing to, the cache, should be fatal; // users of the cache should just fall back to copying the blobs the usual way. +// +// The BlobInfoCache interface is deprecated. Consumers of this library should use one of the implementations provided by +// subpackages of the library's "pkg/blobinfocache" package in preference to implementing the interface on their own. type BlobInfoCache interface { // UncompressedDigest returns an uncompressed digest corresponding to anyDigest. // May return anyDigest if it is known to be uncompressed.