From 8acbdbfe0dc8b62162c0f908bdbeb4fe7c283489 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Fri, 26 Jun 2020 13:14:58 +0200 Subject: [PATCH] support multi-image docker archives Add a `MultiImageArchive{Reader,Writer}` to `docker/archive` to support docker archives with more than one image. To allow the new archive reader/writer to be used for copying images, add an `Image{Destination,Source}` to `copy.Options`. When set, the destination/source referenced will be ignored and the specified `Image{Destination,Source}` will be used instead. Signed-off-by: Valentin Rothberg --- copy/copy.go | 23 +++++++--- docker/archive/dest.go | 5 +++ docker/archive/multi-reader.go | 77 ++++++++++++++++++++++++++++++++++ docker/archive/multi-writer.go | 51 ++++++++++++++++++++++ docker/archive/src.go | 4 ++ docker/archive/transport.go | 18 +++++--- docker/tarfile/dest.go | 75 +++++++++++++++++++++------------ docker/tarfile/src.go | 28 ++++++++----- 8 files changed, 232 insertions(+), 49 deletions(-) create mode 100644 docker/archive/multi-reader.go create mode 100644 docker/archive/multi-writer.go diff --git a/copy/copy.go b/copy/copy.go index 873bdc67f4..ef340f3bbb 100644 --- a/copy/copy.go +++ b/copy/copy.go @@ -170,6 +170,10 @@ type Options struct { ForceManifestMIMEType string ImageListSelection ImageListSelection // set to either CopySystemImage (the default), CopyAllImages, or CopySpecificImages to control which instances we copy when the source reference is a list; ignored if the source reference is not a list Instances []digest.Digest // if ImageListSelection is CopySpecificImages, copy only these instances and the list itself + // ImageDestination is a preset types.ImageDestination object. If not nil, the `destRef` will be ignored. + ImageDestination types.ImageDestination + // ImageSource is a preset types.ImageSource object. If not nil, the `srcRef` will be ignored. + ImageSource types.ImageSource // If OciEncryptConfig is non-nil, it indicates that an image should be encrypted. // The encryption options is derived from the construction of EncryptConfig object. // Note: During initial encryption process of a layer, the resultant digest is not known @@ -219,9 +223,13 @@ func Image(ctx context.Context, policyContext *signature.PolicyContext, destRef, reportWriter = options.ReportWriter } - dest, err := destRef.NewImageDestination(ctx, options.DestinationCtx) - if err != nil { - return nil, errors.Wrapf(err, "Error initializing destination %s", transports.ImageName(destRef)) + var err error + dest := options.ImageDestination + if dest == nil { + dest, err = destRef.NewImageDestination(ctx, options.DestinationCtx) + if err != nil { + return nil, errors.Wrapf(err, "Error initializing destination %s", transports.ImageName(destRef)) + } } defer func() { if err := dest.Close(); err != nil { @@ -229,9 +237,12 @@ func Image(ctx context.Context, policyContext *signature.PolicyContext, destRef, } }() - rawSource, err := srcRef.NewImageSource(ctx, options.SourceCtx) - if err != nil { - return nil, errors.Wrapf(err, "Error initializing source %s", transports.ImageName(srcRef)) + rawSource := options.ImageSource + if rawSource == nil { + rawSource, err = srcRef.NewImageSource(ctx, options.SourceCtx) + if err != nil { + return nil, errors.Wrapf(err, "Error initializing source %s", transports.ImageName(srcRef)) + } } defer func() { if err := rawSource.Close(); err != nil { diff --git a/docker/archive/dest.go b/docker/archive/dest.go index 1cf197429b..55dc2785e2 100644 --- a/docker/archive/dest.go +++ b/docker/archive/dest.go @@ -17,6 +17,10 @@ type archiveImageDestination struct { } func newImageDestination(sys *types.SystemContext, ref archiveReference) (types.ImageDestination, error) { + return newArchiveImageDestination(sys, ref) +} + +func newArchiveImageDestination(sys *types.SystemContext, ref archiveReference) (*archiveImageDestination, error) { // ref.path can be either a pipe or a regular file // in the case of a pipe, we require that we can open it for write // in the case of a regular file, we don't want to overwrite any pre-existing file @@ -27,6 +31,7 @@ func newImageDestination(sys *types.SystemContext, ref archiveReference) (types. return nil, errors.Wrapf(err, "error opening file %q", ref.path) } + // TODO: Docker supports writing to existing files. Shall we? fhStat, err := fh.Stat() if err != nil { return nil, errors.Wrapf(err, "error statting file %q", ref.path) diff --git a/docker/archive/multi-reader.go b/docker/archive/multi-reader.go new file mode 100644 index 0000000000..51916d05f3 --- /dev/null +++ b/docker/archive/multi-reader.go @@ -0,0 +1,77 @@ +package archive + +import ( + "context" + + "github.com/containers/image/v5/docker/tarfile" + "github.com/containers/image/v5/types" +) + +// MultiImageArchiveReader allows for reading a docker-archive with multiple +// images. It can be used in subsequent copy.Image operations when set as the +// copy.Options.ImageSource. Note that you need to call Next() before each +// copy.Image operation. +// +// For using a MultiImageArchiveReader in copy.Image, set it as the +// copy.Option.ImageSource. +type MultiImageArchiveReader struct { + *archiveImageSource + + manifests []tarfile.ManifestItem + manifestIndex int +} + +// NewMultiImageArchiveReader returns a new MultiImageArchiveReader based on the reference. +func NewMultiImageArchiveReader(ctx context.Context, sys *types.SystemContext, ref types.ImageReference) (*MultiImageArchiveReader, error) { + path, destinationRef, err := parsePathAndNamedRef(ref.StringWithinTransport()) + if err != nil { + return nil, err + } + archRef := archiveReference{ + path: path, + destinationRef: destinationRef, + } + + src, err := newArchiveImageSource(ctx, sys, archRef) + if err != nil { + return nil, err + } + + manifests, err := src.LoadTarManifest() + if err != nil { + return nil, err + } + + return &MultiImageArchiveReader{ + archiveImageSource: src, + manifests: manifests, + manifestIndex: -1, // So we can call Next() before each copy.Image call. + }, nil +} + +// Next selects the next image in the archive. False is returned when no image +// is left. +func (m *MultiImageArchiveReader) Next() bool { + m.manifestIndex++ + if m.manifestIndex >= len(m.manifests) { + return false + } + m.ChangeManifest(m.manifestIndex) + return true +} + +// Close is a NOP to allow for using the MultiImageArchiveReader as an +// types.ImageSource but without closing the underlying tarfile.Source. +func (m *MultiImageArchiveReader) Close() error { + return nil +} + +// Finalize closes the underlying tarfile. +func (m *MultiImageArchiveReader) Finalize() error { + return m.archiveImageSource.Close() +} + +// LoadTarManifest returns the tarfile.ManifestItem of the current image. +func (m *MultiImageArchiveReader) LoadTarManifest() ([]tarfile.ManifestItem, error) { + return []tarfile.ManifestItem{m.manifests[m.manifestIndex]}, nil +} diff --git a/docker/archive/multi-writer.go b/docker/archive/multi-writer.go new file mode 100644 index 0000000000..73941fef88 --- /dev/null +++ b/docker/archive/multi-writer.go @@ -0,0 +1,51 @@ +package archive + +import ( + "context" + + "github.com/containers/image/v5/types" + "github.com/pkg/errors" +) + +// MultiImageArchiveReader allows for creating and writing to a docker-archive +// with multiple images. Once created, it can be used in subsequent copy.Image +// operations when set as the copy.Options.ImageDestination. +// +// For using a MultiImageArchiveWriter in copy.Image, set it as the +// copy.Option.ImageDestination. +type MultiImageArchiveWriter struct { + *archiveImageDestination +} + +func NewMultiImageArchiveWriter(ctx context.Context, sys *types.SystemContext, path string) (*MultiImageArchiveWriter, error) { + ref := archiveReference{path: path} + dst, err := newArchiveImageDestination(sys, ref) + if err != nil { + return nil, err + } + return &MultiImageArchiveWriter{archiveImageDestination: dst}, nil +} + +// Close is a NOP. Use Finalize() instead. +func (m *MultiImageArchiveWriter) Close() error { + return nil +} + +// Commit is a NOP. Use Finalize() instead. +func (m *MultiImageArchiveWriter) Commit(_ context.Context, _ types.UnparsedImage) error { + return nil +} + +// Finalize commits pending data and closes the underlying tarfile. +func (m *MultiImageArchiveWriter) Finalize(ctx context.Context) (finalErr error) { + defer func() { + if err := m.writer.Close(); err != nil { + if finalErr == nil { + finalErr = err + } else { + finalErr = errors.Wrap(finalErr, err.Error()) + } + } + }() + return m.Destination.Commit(ctx) +} diff --git a/docker/archive/src.go b/docker/archive/src.go index 6a628508d3..ff2d0a678c 100644 --- a/docker/archive/src.go +++ b/docker/archive/src.go @@ -16,6 +16,10 @@ type archiveImageSource struct { // newImageSource returns a types.ImageSource for the specified image reference. // The caller must call .Close() on the returned ImageSource. func newImageSource(ctx context.Context, sys *types.SystemContext, ref archiveReference) (types.ImageSource, error) { + return newArchiveImageSource(ctx, sys, ref) +} + +func newArchiveImageSource(ctx context.Context, sys *types.SystemContext, ref archiveReference) (*archiveImageSource, error) { if ref.destinationRef != nil { logrus.Warnf("docker-archive: references are not supported for sources (ignoring)") } diff --git a/docker/archive/transport.go b/docker/archive/transport.go index 26bc687e00..7bfbf717d1 100644 --- a/docker/archive/transport.go +++ b/docker/archive/transport.go @@ -47,10 +47,10 @@ type archiveReference struct { destinationRef reference.NamedTagged } -// ParseReference converts a string, which should not start with the ImageTransport.Name prefix, into an Docker ImageReference. -func ParseReference(refString string) (types.ImageReference, error) { +// parsePathAndNamedRef parses `refString` and returns it's path and a reference.NamedTagged. +func parsePathAndNamedRef(refString string) (string, reference.NamedTagged, error) { if refString == "" { - return nil, errors.Errorf("docker-archive reference %s isn't of the form [:]", refString) + return "", nil, errors.Errorf("docker-archive reference %s isn't of the form [:]", refString) } parts := strings.SplitN(refString, ":", 2) @@ -61,17 +61,25 @@ func ParseReference(refString string) (types.ImageReference, error) { if len(parts) == 2 { ref, err := reference.ParseNormalizedNamed(parts[1]) if err != nil { - return nil, errors.Wrapf(err, "docker-archive parsing reference") + return "", nil, errors.Wrapf(err, "docker-archive parsing reference") } ref = reference.TagNameOnly(ref) refTagged, isTagged := ref.(reference.NamedTagged) if !isTagged { // Really shouldn't be hit... - return nil, errors.Errorf("internal error: reference is not tagged even after reference.TagNameOnly: %s", refString) + return "", nil, errors.Errorf("internal error: reference is not tagged even after reference.TagNameOnly: %s", refString) } destinationRef = refTagged } + return path, destinationRef, nil +} +// ParseReference converts a string, which should not start with the ImageTransport.Name prefix, into an Docker ImageReference. +func ParseReference(refString string) (types.ImageReference, error) { + path, destinationRef, err := parsePathAndNamedRef(refString) + if err != nil { + return nil, err + } return NewReference(path, destinationRef) } diff --git a/docker/tarfile/dest.go b/docker/tarfile/dest.go index c171da5059..553c520f6b 100644 --- a/docker/tarfile/dest.go +++ b/docker/tarfile/dest.go @@ -24,9 +24,11 @@ import ( // Destination is a partial implementation of types.ImageDestination for writing to an io.Writer. type Destination struct { - writer io.Writer - tar *tar.Writer - repoTags []reference.NamedTagged + writer io.Writer + tar *tar.Writer + repoTags []reference.NamedTagged + manifest []ManifestItem + repositories map[string]map[string]string // Other state. blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs config []byte @@ -46,11 +48,12 @@ func NewDestinationWithContext(sys *types.SystemContext, dest io.Writer, ref ref repoTags = append(repoTags, ref) } return &Destination{ - writer: dest, - tar: tar.NewWriter(dest), - repoTags: repoTags, - blobs: make(map[digest.Digest]types.BlobInfo), - sysCtx: sys, + writer: dest, + tar: tar.NewWriter(dest), + repoTags: repoTags, + blobs: make(map[digest.Digest]types.BlobInfo), + sysCtx: sys, + repositories: map[string]map[string]string{}, } } @@ -184,22 +187,14 @@ func (d *Destination) TryReusingBlob(ctx context.Context, info types.BlobInfo, c } func (d *Destination) createRepositoriesFile(rootLayerID string) error { - repositories := map[string]map[string]string{} for _, repoTag := range d.repoTags { - if val, ok := repositories[repoTag.Name()]; ok { + if val, ok := d.repositories[repoTag.Name()]; ok { val[repoTag.Tag()] = rootLayerID } else { - repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID} + d.repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID} } } - b, err := json.Marshal(repositories) - if err != nil { - return errors.Wrap(err, "Error marshaling repositories") - } - if err := d.sendBytes(legacyRepositoriesFileName, b); err != nil { - return errors.Wrap(err, "Error writing config json file") - } return nil } @@ -256,20 +251,18 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest repoTags = append(repoTags, refString) } - items := []ManifestItem{{ + d.manifest = append(d.manifest, ManifestItem{ Config: man.ConfigDescriptor.Digest.Hex() + ".json", RepoTags: repoTags, Layers: layerPaths, Parent: "", LayerSources: nil, - }} - itemsBytes, err := json.Marshal(&items) - if err != nil { - return err - } + }) - // FIXME? Do we also need to support the legacy format? - return d.sendBytes(manifestFileName, itemsBytes) + // Reset the repoTags to prevent them from leaking into a following + // image/manifest. + d.repoTags = []reference.NamedTagged{} + return nil } // writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers @@ -419,6 +412,34 @@ func (d *Destination) PutSignatures(ctx context.Context, signatures [][]byte, in // Commit finishes writing data to the underlying io.Writer. // It is the caller's responsibility to close it, if necessary. -func (d *Destination) Commit(ctx context.Context) error { +func (d *Destination) Commit(ctx context.Context) (finalErr error) { + defer func() { + if err := d.tar.Close(); err != nil { + if finalErr == nil { + finalErr = err + } else { + finalErr = errors.Wrap(finalErr, err.Error()) + } + } + }() + // Writing the manifest here instead of PutManifest allows for + // supporting multi-image archives. + itemsBytes, err := json.Marshal(d.manifest) + if err != nil { + return err + } + + // FIXME? Do we also need to support the legacy format? + if err := d.sendBytes(manifestFileName, itemsBytes); err != nil { + return err + } + + repoBytes, err := json.Marshal(d.repositories) + if err != nil { + return errors.Wrap(err, "Error marshaling repositories") + } + if err := d.sendBytes(legacyRepositoriesFileName, repoBytes); err != nil { + return errors.Wrap(err, "Error writing config json file") + } return d.tar.Close() } diff --git a/docker/tarfile/src.go b/docker/tarfile/src.go index 4d2368c70a..ab68d0947c 100644 --- a/docker/tarfile/src.go +++ b/docker/tarfile/src.go @@ -25,7 +25,9 @@ type Source struct { tarPath string removeTarPathOnClose bool // Remove temp file on close if true // The following data is only available after ensureCachedDataIsPresent() succeeds - tarManifest *ManifestItem // nil if not available yet. + tarManifest *ManifestItem // nil if not available yet. + tarManifestIndex int // index to select which manifest/image to select + configBytes []byte configDigest digest.Digest orderedDiffIDList []digest.Digest @@ -138,6 +140,15 @@ func (t *tarReadCloser) Close() error { return t.backingFile.Close() } +// ChangeManifest changes the current manifest in the Source to the specified +// index. An index of 2 will change to the third manifest in the Source. This +// allows for reading _all_ images in sequence. +func (s *Source) ChangeManifest(index int) { + s.tarManifestIndex = index + s.cacheDataLock = sync.Once{} + s.generatedManifest = nil +} + // openTarComponent returns a ReadCloser for the specific file within the archive. // This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers), // and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough. @@ -235,31 +246,26 @@ func (s *Source) ensureCachedDataIsPresentPrivate() error { return err } - // Check to make sure length is 1 - if len(tarManifest) != 1 { - return errors.Errorf("Unexpected tar manifest.json: expected 1 item, got %d", len(tarManifest)) - } - // Read and parse config. - configBytes, err := s.readTarComponent(tarManifest[0].Config, iolimits.MaxConfigBodySize) + configBytes, err := s.readTarComponent(tarManifest[s.tarManifestIndex].Config, iolimits.MaxConfigBodySize) if err != nil { return err } var parsedConfig manifest.Schema2Image // There's a lot of info there, but we only really care about layer DiffIDs. if err := json.Unmarshal(configBytes, &parsedConfig); err != nil { - return errors.Wrapf(err, "Error decoding tar config %s", tarManifest[0].Config) + return errors.Wrapf(err, "Error decoding tar config %s", tarManifest[s.tarManifestIndex].Config) } if parsedConfig.RootFS == nil { - return errors.Errorf("Invalid image config (rootFS is not set): %s", tarManifest[0].Config) + return errors.Errorf("Invalid image config (rootFS is not set): %s", tarManifest[s.tarManifestIndex].Config) } - knownLayers, err := s.prepareLayerData(&tarManifest[0], &parsedConfig) + knownLayers, err := s.prepareLayerData(&tarManifest[s.tarManifestIndex], &parsedConfig) if err != nil { return err } // Success; commit. - s.tarManifest = &tarManifest[0] + s.tarManifest = &tarManifest[s.tarManifestIndex] s.configBytes = configBytes s.configDigest = digest.FromBytes(configBytes) s.orderedDiffIDList = parsedConfig.RootFS.DiffIDs