Skip to content

Commit

Permalink
blobinfocache: track compression types for locations
Browse files Browse the repository at this point in the history
Extend the blob info cache to also cache the name of the type of
compression used on a blob that we've seen, or specific values that
indicate that we know the blob was not compressed, or that we don't
know whether or not it was compressed.

New methods for adding known blob-compression pairs and reading
candidate locations including compression information are part of a new
internal BlobInfoCache2 interface which the library's BlobInfoCache
implementors also implement.

When we copy a blob, try to record the state of compression for the
source blob, and if we applied any changes, the blob we produced.

Make sure that when TryReusingBlob successfully uses a blob from the
blob info cache, that it provides compression information in the
BlobInfo that it returns, so that manifests can be updated to describe
layers using the correct MIME types.

When attempting to write a manifest, if a manifest can't be written
because layers were compressed using an algorithm which can't be
expressed using that manifest type, continue on to trying other manifest
formats.

Signed-off-by: Nalin Dahyabhai <[email protected]>
  • Loading branch information
nalind committed Jan 15, 2021
1 parent 22b9e38 commit 5e6ae45
Show file tree
Hide file tree
Showing 27 changed files with 620 additions and 92 deletions.
47 changes: 39 additions & 8 deletions copy/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"github.com/containers/image/v5/docker/reference"
"github.com/containers/image/v5/image"
internalblobinfocache "github.com/containers/image/v5/internal/blobinfocache"
"github.com/containers/image/v5/internal/pkg/platform"
"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/pkg/blobinfocache"
Expand Down Expand Up @@ -113,7 +114,7 @@ type copier struct {
progressOutput io.Writer
progressInterval time.Duration
progress chan types.ProgressProperties
blobInfoCache types.BlobInfoCache
blobInfoCache internalblobinfocache.BlobInfoCache2
copyInParallel bool
compressionFormat compression.Algorithm
compressionLevel *int
Expand Down Expand Up @@ -265,7 +266,7 @@ func Image(ctx context.Context, policyContext *signature.PolicyContext, destRef,
// FIXME? The cache is used for sources and destinations equally, but we only have a SourceCtx and DestinationCtx.
// For now, use DestinationCtx (because blob reuse changes the behavior of the destination side more); eventually
// we might want to add a separate CommonCtx — or would that be too confusing?
blobInfoCache: blobinfocache.DefaultCache(options.DestinationCtx),
blobInfoCache: internalblobinfocache.FromBlobInfoCache(blobinfocache.DefaultCache(options.DestinationCtx)),
ociDecryptConfig: options.OciDecryptConfig,
ociEncryptConfig: options.OciEncryptConfig,
}
Expand Down Expand Up @@ -648,13 +649,19 @@ func (c *copier) copyOneImage(ctx context.Context, policyContext *signature.Poli
// With docker/distribution registries we do not know whether the registry accepts schema2 or schema1 only;
// and at least with the OpenShift registry "acceptschema2" option, there is no way to detect the support
// without actually trying to upload something and getting a types.ManifestTypeRejectedError.
// So, try the preferred manifest MIME type. If the process succeeds, fine…
// So, try the preferred manifest MIME type with possibly-updated blob digests, media types, and sizes if
// we're altering how they're compressed. If the process succeeds, fine…
manifestBytes, retManifestDigest, err := ic.copyUpdatedConfigAndManifest(ctx, targetInstance)
retManifestType = preferredManifestMIMEType
if err != nil {
logrus.Debugf("Writing manifest using preferred type %s failed: %v", preferredManifestMIMEType, err)
// … if it fails, _and_ the failure is because the manifest is rejected, we may have other options.
if _, isManifestRejected := errors.Cause(err).(types.ManifestTypeRejectedError); !isManifestRejected || len(otherManifestMIMETypeCandidates) == 0 {
// … if it fails, and the failure is either because the manifest is rejected by the registry, or
// because we failed to create a manifest of the specified type because the specific manifest type
// doesn't support the type of compression we're trying to use (e.g. docker v2s2 and zstd), we may
// have other options available that could still succeed.
_, isManifestRejected := errors.Cause(err).(types.ManifestTypeRejectedError)
_, isCompressionIncompatible := errors.Cause(err).(manifest.ManifestLayerCompressionIncompatibilityError)
if (!isManifestRejected && !isCompressionIncompatible) || len(otherManifestMIMETypeCandidates) == 0 {
// We don’t have other options.
// In principle the code below would handle this as well, but the resulting error message is fairly ugly.
// Don’t bother the user with MIME types if we have no choice.
Expand Down Expand Up @@ -896,7 +903,7 @@ func (ic *imageCopier) copyLayers(ctx context.Context) error {
return nil
}

// layerDigestsDiffer return true iff the digests in a and b differ (ignoring sizes and possible other fields)
// layerDigestsDiffer returns true iff the digests in a and b differ (ignoring sizes and possible other fields)
func layerDigestsDiffer(a, b []types.BlobInfo) bool {
if len(a) != len(b) {
return true
Expand Down Expand Up @@ -951,7 +958,7 @@ func (ic *imageCopier) copyUpdatedConfigAndManifest(ctx context.Context, instanc
instanceDigest = &manifestDigest
}
if err := ic.c.dest.PutManifest(ctx, man, instanceDigest); err != nil {
return nil, "", errors.Wrap(err, "Error writing manifest")
return nil, "", errors.Wrapf(err, "Error writing manifest %q", string(man))
}
return man, manifestDigest, nil
}
Expand Down Expand Up @@ -1058,6 +1065,12 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to

// If we already have the blob, and we don't need to compute the diffID, then we don't need to read it from the source.
if !diffIDIsNeeded {
// TODO: at this point we don't know whether or not a blob we end up reusing is compressed using an algorithm
// that is acceptable for use on layers in the manifest that we'll be writing later, so if we end up reusing
// a blob that's compressed with e.g. zstd, but we're only allowed to write a v2s2 manifest, this will cause
// a failure when we eventually try to update the manifest with the digest and MIME type of the reused blob.
// Fixing that will probably require passing more information to TryReusingBlob() than the current version of
// the ImageDestination interface lets us pass in.
reused, blobInfo, err := ic.c.dest.TryReusingBlob(ctx, srcInfo, ic.c.blobInfoCache, ic.canSubstituteBlobs)
if err != nil {
return types.BlobInfo{}, "", errors.Wrapf(err, "Error trying to reuse blob %s at destination", srcInfo.Digest)
Expand Down Expand Up @@ -1258,11 +1271,18 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
// === Deal with layer compression/decompression if necessary
var inputInfo types.BlobInfo
var compressionOperation types.LayerCompression
srcCompressorName := internalblobinfocache.Uncompressed
if isCompressed {
srcCompressorName = compressionFormat.Name()
}
var uploadCompressorName string
if canModifyBlob && isOciEncrypted(srcInfo.MediaType) {
// PreserveOriginal due to any compression not being able to be done on an encrypted blob unless decrypted
logrus.Debugf("Using original blob without modification for encrypted blob")
compressionOperation = types.PreserveOriginal
inputInfo = srcInfo
srcCompressorName = internalblobinfocache.UnknownCompression
uploadCompressorName = internalblobinfocache.UnknownCompression
} else if canModifyBlob && c.dest.DesiredLayerCompression() == types.Compress && !isCompressed {
logrus.Debugf("Compressing blob on the fly")
compressionOperation = types.Compress
Expand All @@ -1276,6 +1296,7 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
destStream = pipeReader
inputInfo.Digest = ""
inputInfo.Size = -1
uploadCompressorName = desiredCompressionFormat.Name()
} else if canModifyBlob && c.dest.DesiredLayerCompression() == types.Compress && isCompressed && desiredCompressionFormat.Name() != compressionFormat.Name() {
// When the blob is compressed, but the desired format is different, it first needs to be decompressed and finally
// re-compressed using the desired format.
Expand All @@ -1296,6 +1317,7 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
destStream = pipeReader
inputInfo.Digest = ""
inputInfo.Size = -1
uploadCompressorName = desiredCompressionFormat.Name()
} else if canModifyBlob && c.dest.DesiredLayerCompression() == types.Decompress && isCompressed {
logrus.Debugf("Blob will be decompressed")
compressionOperation = types.Decompress
Expand All @@ -1307,11 +1329,13 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
destStream = s
inputInfo.Digest = ""
inputInfo.Size = -1
uploadCompressorName = internalblobinfocache.Uncompressed
} else {
// PreserveOriginal might also need to recompress the original blob if the desired compression format is different.
logrus.Debugf("Using original blob without modification")
compressionOperation = types.PreserveOriginal
inputInfo = srcInfo
uploadCompressorName = srcCompressorName
}

// Perform image encryption for valid mediatypes if ociEncryptConfig provided
Expand Down Expand Up @@ -1390,7 +1414,7 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
}
}

// This is fairly horrible: the writer from getOriginalLayerCopyWriter wants to consumer
// This is fairly horrible: the writer from getOriginalLayerCopyWriter wants to consume
// all of the input (to compute DiffIDs), even if dest.PutBlob does not need it.
// So, read everything from originalLayerReader, which will cause the rest to be
// sent there if we are not already at EOF.
Expand All @@ -1408,6 +1432,7 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
if inputInfo.Digest != "" && uploadedInfo.Digest != inputInfo.Digest {
return types.BlobInfo{}, errors.Errorf("Internal error writing blob %s, blob with digest %s saved with digest %s", srcInfo.Digest, inputInfo.Digest, uploadedInfo.Digest)
}

if digestingReader.validationSucceeded {
// If compressionOperation != types.PreserveOriginal, we now have two reliable digest values:
// srcinfo.Digest describes the pre-compressionOperation input, verified by digestingReader
Expand All @@ -1423,6 +1448,12 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
default:
return types.BlobInfo{}, errors.Errorf("Internal error: Unexpected compressionOperation value %#v", compressionOperation)
}
if uploadCompressorName != "" && uploadCompressorName != internalblobinfocache.UnknownCompression {
c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, uploadCompressorName)
}
if srcInfo.Digest != "" && srcCompressorName != "" && srcCompressorName != internalblobinfocache.UnknownCompression {
c.blobInfoCache.RecordDigestCompressorName(srcInfo.Digest, srcCompressorName)
}
}
return uploadedInfo, nil
}
Expand Down
5 changes: 3 additions & 2 deletions directory/directory_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,9 @@ func (d *dirImageDestination) PutBlob(ctx context.Context, stream io.Reader, inp
// (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree).
// info.Digest must not be empty.
// If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input.
// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size.
// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may
// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be
// reflected in the manifest that will be written.
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
// May use and/or update cache.
func (d *dirImageDestination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) {
Expand All @@ -210,7 +212,6 @@ func (d *dirImageDestination) TryReusingBlob(ctx context.Context, info types.Blo
return false, types.BlobInfo{}, err
}
return true, types.BlobInfo{Digest: info.Digest, Size: finfo.Size()}, nil

}

// PutManifest writes manifest to the destination.
Expand Down
29 changes: 23 additions & 6 deletions docker/docker_image_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"strings"

"github.com/containers/image/v5/docker/reference"
"github.com/containers/image/v5/internal/blobinfocache"
"github.com/containers/image/v5/internal/iolimits"
"github.com/containers/image/v5/internal/uploadreader"
"github.com/containers/image/v5/manifest"
Expand Down Expand Up @@ -284,7 +285,9 @@ func (d *dockerImageDestination) mountBlob(ctx context.Context, srcRepo referenc
// (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree).
// info.Digest must not be empty.
// If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input.
// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size.
// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may
// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be
// reflected in the manifest that will be written.
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
// May use and/or update cache.
func (d *dockerImageDestination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) {
Expand All @@ -299,17 +302,23 @@ func (d *dockerImageDestination) TryReusingBlob(ctx context.Context, info types.
}
if exists {
cache.RecordKnownLocation(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, newBICLocationReference(d.ref))
return true, types.BlobInfo{Digest: info.Digest, Size: size}, nil
return true, types.BlobInfo{Digest: info.Digest, MediaType: info.MediaType, Size: size}, nil
}

// Then try reusing blobs from other locations.
for _, candidate := range cache.CandidateLocations(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, canSubstitute) {
bic := blobinfocache.FromBlobInfoCache(cache)
candidates := bic.CandidateLocations2(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, canSubstitute)
for _, candidate := range candidates {
candidateRepo, err := parseBICLocationReference(candidate.Location)
if err != nil {
logrus.Debugf("Error parsing BlobInfoCache location reference: %s", err)
continue
}
logrus.Debugf("Trying to reuse cached location %s in %s", candidate.Digest.String(), candidateRepo.Name())
if candidate.CompressorName != blobinfocache.Uncompressed {
logrus.Debugf("Trying to reuse cached location %s compressed with %s in %s", candidate.Digest.String(), candidate.CompressorName, candidateRepo.Name())
} else {
logrus.Debugf("Trying to reuse cached location %s with no compression in %s", candidate.Digest.String(), candidateRepo.Name())
}

// Sanity checks:
if reference.Domain(candidateRepo) != reference.Domain(d.ref.ref) {
Expand Down Expand Up @@ -351,8 +360,16 @@ func (d *dockerImageDestination) TryReusingBlob(ctx context.Context, info types.
continue
}
}
cache.RecordKnownLocation(d.ref.Transport(), bicTransportScope(d.ref), candidate.Digest, newBICLocationReference(d.ref))
return true, types.BlobInfo{Digest: candidate.Digest, Size: size}, nil

bic.RecordKnownLocation(d.ref.Transport(), bicTransportScope(d.ref), candidate.Digest, newBICLocationReference(d.ref))

compressionOperation, compressionAlgorithm, err := blobinfocache.OperationAndAlgorithmForCompressor(candidate.CompressorName)
if err != nil {
logrus.Debugf("... Failed: %v", err)
continue
}

return true, types.BlobInfo{Digest: candidate.Digest, MediaType: info.MediaType, Size: size, CompressionOperation: compressionOperation, CompressionAlgorithm: compressionAlgorithm}, nil
}

return false, types.BlobInfo{}, nil
Expand Down
4 changes: 3 additions & 1 deletion docker/internal/tarfile/dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t
// (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree).
// info.Digest must not be empty.
// If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input.
// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size.
// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may
// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be
// reflected in the manifest that will be written.
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
// May use and/or update cache.
func (d *Destination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) {
Expand Down
4 changes: 3 additions & 1 deletion docker/tarfile/dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t
// (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree).
// info.Digest must not be empty.
// If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input.
// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size.
// If the blob has been successfully reused, returns (true, info, nil); info must contain at least a digest and size, and may
// include CompressionOperation and CompressionAlgorithm fields to indicate that a change to the compression type should be
// reflected in the manifest that will be written.
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
// May use and/or update cache.
func (d *Destination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) {
Expand Down
3 changes: 3 additions & 0 deletions image/docker_schema2.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ func (m *manifestSchema2) UpdatedImageNeedsLayerDiffIDs(options types.ManifestUp

// UpdatedImage returns a types.Image modified according to options.
// This does not change the state of the original Image object.
// The returned error will be a manifest.ManifestLayerCompressionIncompatibilityError
// if the CompressionOperation and CompressionAlgorithm specified in one or more
// options.LayerInfos items is anything other than gzip.
func (m *manifestSchema2) UpdatedImage(ctx context.Context, options types.ManifestUpdateOptions) (types.Image, error) {
copy := manifestSchema2{ // NOTE: This is not a deep copy, it still shares slices etc.
src: m.src,
Expand Down
4 changes: 4 additions & 0 deletions image/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ func (m *manifestOCI1) UpdatedImageNeedsLayerDiffIDs(options types.ManifestUpdat

// UpdatedImage returns a types.Image modified according to options.
// This does not change the state of the original Image object.
// The returned error will be a manifest.ManifestLayerCompressionIncompatibilityError
// if the combination of CompressionOperation and CompressionAlgorithm specified
// in one or more options.LayerInfos items indicates that a layer is compressed using
// an algorithm that is not allowed in OCI.
func (m *manifestOCI1) UpdatedImage(ctx context.Context, options types.ManifestUpdateOptions) (types.Image, error) {
copy := manifestOCI1{ // NOTE: This is not a deep copy, it still shares slices etc.
src: m.src,
Expand Down
Loading

0 comments on commit 5e6ae45

Please sign in to comment.