Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP - support multi-image docker archives #975

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 68 additions & 5 deletions docker/archive/dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"io"
"os"

"github.com/containers/image/v5/docker/reference"
"github.com/containers/image/v5/docker/tarfile"
"github.com/containers/image/v5/types"
"github.com/pkg/errors"
Expand All @@ -17,32 +18,36 @@ type archiveImageDestination struct {
}

func newImageDestination(sys *types.SystemContext, ref archiveReference) (types.ImageDestination, error) {
return newArchiveImageDestination(sys, ref.path, ref.destinationRef)
}

func newArchiveImageDestination(sys *types.SystemContext, path string, ref reference.NamedTagged) (*archiveImageDestination, error) {
// ref.path can be either a pipe or a regular file
// in the case of a pipe, we require that we can open it for write
// in the case of a regular file, we don't want to overwrite any pre-existing file
// so we check for Size() == 0 below (This is racy, but using O_EXCL would also be racy,
// only in a different way. Either way, it’s up to the user to not have two writers to the same path.)
fh, err := os.OpenFile(ref.path, os.O_WRONLY|os.O_CREATE, 0644)
fh, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return nil, errors.Wrapf(err, "error opening file %q", ref.path)
return nil, errors.Wrapf(err, "error opening file %q", path)
}

fhStat, err := fh.Stat()
if err != nil {
return nil, errors.Wrapf(err, "error statting file %q", ref.path)
return nil, errors.Wrapf(err, "error statting file %q", path)
}

if fhStat.Mode().IsRegular() && fhStat.Size() != 0 {
return nil, errors.New("docker-archive doesn't support modifying existing images")
}

tarDest := tarfile.NewDestinationWithContext(sys, fh, ref.destinationRef)
tarDest := tarfile.NewDestinationWithContext(sys, fh, ref)
if sys != nil && sys.DockerArchiveAdditionalTags != nil {
tarDest.AddRepoTags(sys.DockerArchiveAdditionalTags)
}
return &archiveImageDestination{
Destination: tarDest,
ref: ref,
ref: archiveReference{path, ref},
writer: fh,
}, nil
}
Expand Down Expand Up @@ -70,3 +75,61 @@ func (d *archiveImageDestination) Close() error {
func (d *archiveImageDestination) Commit(ctx context.Context, unparsedToplevel types.UnparsedImage) error {
return d.Destination.Commit(ctx)
}

func (m multiImageDestinationReference) NewImageDestination(_ context.Context, _ *types.SystemContext) (types.ImageDestination, error) {
return m.dest, nil
}

// MultiImageDestinations allows for creating and writing to docker archives
// that include more than one image.
type MultiImageDestination struct {
*archiveImageDestination
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn’t this externally expose PutBlob and everything else?

path string
}

// multiImageDestinationReference is a types.ImageReference embedding a MultiImageDestination.
type multiImageDestinationReference struct {
*archiveReference
dest *MultiImageDestination
}

// NewMultiImageDestination returns a MultiImageDestination for the specified path.
func NewMultiImageDestination(sys *types.SystemContext, path string) (*MultiImageDestination, error) {
dest, err := newArchiveImageDestination(sys, path, nil)
if err != nil {
return nil, err
}
return &MultiImageDestination{dest, path}, nil
}

// Reference returns an ImageReference embedding the MultiImageDestination.
func (m *MultiImageDestination) Reference() types.ImageReference {
ref := &archiveReference{path: m.path}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not include the tag

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but not on the reference = not in copy.Image error messages.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OTOH there is still the case of saving untagged images, so references don’t always have any extra data anyway.

I’ll read all of this more carefully a bit later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Much appreciated, thanks!

I am on PTO tomorrow but would will go back to this on Monday morning. Ideally, we need to get the feature in next week.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mtrmac, ideas how to proceed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mtrmac @rhatdan ... can we get this moving or are we blocked on something? I am getting increasing pressure to get this done.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please proceed?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m sorry for not getting back, I did promise I would.

Still, #975 (review) was I think fairly clear to the general direction. See #991 for an unfinished prototype of the read side. Yes, the PR is larger, but it already includes the ability to read any (even untagged) image in an archive using a textual reference, and a lot of the “new” code is actually only moved — tarfile.Source was just split into two, the only non-trivial net new code at that layer is just chooseManifest.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I am still not sure how to proceed.

Now we have two PRs and I am worried we are running out of time; in fact, we might not get it into RHEL 8.3 any more.

If you agree, I want to focus on Podman-only needs first. We can still make follow-up cards for a more generally applicable solution, in case that will buy us some time. Certainly, the API shouldn't break.

return &multiImageDestinationReference{ref, m}
}

// Close is a NOP. Please use Finalize() for committing the archive and
// closing the underlying resources.
func (m *MultiImageDestination) Close() error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Close() is conceptually a bit different from Finalize() (or ImageDestination.Commit); it allows cleaning up the temporary files even on error.

Sure, this is a possible way to structure the API, but it’s a bit inconvenient to use: A typically caller will use something like defer multiDest.Close() and might not even check for errors if there is already a failure with a more important root cause to preserve, whereas on success the caller really wants to check that multiDest.Finalize() did succeed. Having “commit” and “deallocate” be the same operation forces every such caller to have a committed flag that’s checked inside the defer, or to have a critical part of the process in a defer.

return nil
}

// Commit is a NOP. Please use Finalize() for committing the archive and
// closing the underlying resources.
func (m *MultiImageDestination) Commit(_ context.Context, _ types.UnparsedImage) error {
return nil
}

// Finalize commits pending data and closes the underlying tarfile.
func (m *MultiImageDestination) Finalize(ctx context.Context) (finalErr error) {
defer func() {
if err := m.writer.Close(); err != nil {
if finalErr == nil {
finalErr = err
} else {
finalErr = errors.Wrap(finalErr, err.Error())
}
}
}()
return m.Destination.Commit(ctx)
}
81 changes: 81 additions & 0 deletions docker/archive/src.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"

"github.com/containers/image/v5/docker/tarfile"
ctrImage "github.com/containers/image/v5/image"
"github.com/containers/image/v5/types"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -34,3 +35,83 @@ func newImageSource(ctx context.Context, sys *types.SystemContext, ref archiveRe
func (s *archiveImageSource) Reference() types.ImageReference {
return s.ref
}

// MultiImageSourceItem is a reference to _one_ image in a multi-image archive.
// Note that MultiImageSourceItem implements types.ImageReference. It's a
// long-lived object that can only be closed via it's parent MultiImageSource.
type MultiImageSourceItem struct {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conceptually, I’m concerned about having two ImageReference implementations with different behavior but the same string syntax. ref.Transport().ParseReference(ref.StringWithinTransport()) is documented to be “equivalent to” ref; that’s easiest to do when there is just one implementation, and not the case here (with a docker-archive:$path-formatted references that successfully access images in multi-image archives, but fail when used from the CLI).

*archiveReference
tarSource *tarfile.Source
}

// Manifest returns the tarfile.ManifestItem.
func (m *MultiImageSourceItem) Manifest() (*tarfile.ManifestItem, error) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing but RepoTags is really usable by callers, see e.g. the use of Config in containers/podman#6811 ; so I’m a bit reluctant to commit to this as an API. OTOH, we clearly can keep this stable.

items, err := m.tarSource.LoadTarManifest()
if err != nil {
return nil, err
}
return &items[0], nil
vrothberg marked this conversation as resolved.
Show resolved Hide resolved
}

// NewImage returns a types.ImageCloser for this reference, possibly
// specialized for this ImageTransport.
func (m MultiImageSourceItem) NewImage(ctx context.Context, sys *types.SystemContext) (types.ImageCloser, error) {
src, err := m.NewImageSource(ctx, sys)
if err != nil {
return nil, err
}
return ctrImage.FromSource(ctx, sys, src)
}

// NewImageSource returns a types.ImageSource for this reference.
func (m MultiImageSourceItem) NewImageSource(ctx context.Context, sys *types.SystemContext) (types.ImageSource, error) {
return &archiveImageSource{
Source: m.tarSource,
ref: *m.archiveReference,
}, nil
}

// MultiImageSource allows for reading docker archives that includes more
// than one image. Use Items() to extract
type MultiImageSource struct {
path string
tarSource *tarfile.Source
}

// NewMultiImageSource creates a MultiImageSource for the
// specified path pointing to a docker-archive.
func NewMultiImageSource(ctx context.Context, sys *types.SystemContext, path string) (*MultiImageSource, error) {
src, err := tarfile.NewSourceFromFileWithContext(sys, path)
if err != nil {
return nil, err
}
return &MultiImageSource{path: path, tarSource: src}, nil
}

// Close closes the underlying tarfile.
func (m *MultiImageSource) Close() error {
return m.tarSource.Close()
}

// Items returns a MultiImageSourceItem for all manifests/images in the archive.
// Each references embeds an ImageSource pointing to the corresponding image in
// the archive.
func (m *MultiImageSource) Items() ([]MultiImageSourceItem, error) {
items, err := m.tarSource.LoadTarManifest()
if err != nil {
return nil, err
}
references := []MultiImageSourceItem{}
for index := range items {
src, err := m.tarSource.FromManifest(index)
if err != nil {
return nil, err
}
newRef := MultiImageSourceItem{
&archiveReference{path: m.path},
src,
}
references = append(references, newRef)
}
return references, nil
}
85 changes: 51 additions & 34 deletions docker/tarfile/dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ import (

// Destination is a partial implementation of types.ImageDestination for writing to an io.Writer.
type Destination struct {
writer io.Writer
tar *tar.Writer
repoTags []reference.NamedTagged
writer io.Writer
tar *tar.Writer
repoTags []reference.NamedTagged
manifest []ManifestItem
repositories map[string]map[string]string
// Other state.
blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs
config []byte
Expand All @@ -46,11 +48,12 @@ func NewDestinationWithContext(sys *types.SystemContext, dest io.Writer, ref ref
repoTags = append(repoTags, ref)
}
return &Destination{
writer: dest,
tar: tar.NewWriter(dest),
repoTags: repoTags,
blobs: make(map[digest.Digest]types.BlobInfo),
sysCtx: sys,
writer: dest,
tar: tar.NewWriter(dest),
repoTags: repoTags,
blobs: make(map[digest.Digest]types.BlobInfo),
sysCtx: sys,
repositories: map[string]map[string]string{},
}
}

Expand Down Expand Up @@ -183,24 +186,14 @@ func (d *Destination) TryReusingBlob(ctx context.Context, info types.BlobInfo, c
return false, types.BlobInfo{}, nil
}

func (d *Destination) createRepositoriesFile(rootLayerID string) error {
repositories := map[string]map[string]string{}
func (d *Destination) addRootLayerToRepositories(rootLayerID string) {
for _, repoTag := range d.repoTags {
if val, ok := repositories[repoTag.Name()]; ok {
if val, ok := d.repositories[repoTag.Name()]; ok {
val[repoTag.Tag()] = rootLayerID
} else {
repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID}
d.repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID}
}
}

b, err := json.Marshal(repositories)
if err != nil {
return errors.Wrap(err, "Error marshaling repositories")
}
if err := d.sendBytes(legacyRepositoriesFileName, b); err != nil {
return errors.Wrap(err, "Error writing config json file")
}
return nil
}

// PutManifest writes manifest to the destination.
Expand Down Expand Up @@ -229,9 +222,7 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest
}

if len(man.LayersDescriptors) > 0 {
if err := d.createRepositoriesFile(lastLayerID); err != nil {
return err
}
d.addRootLayerToRepositories(lastLayerID)
}

repoTags := []string{}
Expand All @@ -256,20 +247,18 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest
repoTags = append(repoTags, refString)
}

items := []ManifestItem{{
d.manifest = append(d.manifest, ManifestItem{
Config: man.ConfigDescriptor.Digest.Hex() + ".json",
RepoTags: repoTags,
Layers: layerPaths,
Parent: "",
LayerSources: nil,
}}
itemsBytes, err := json.Marshal(&items)
if err != nil {
return err
}
})

// FIXME? Do we also need to support the legacy format?
return d.sendBytes(manifestFileName, itemsBytes)
// Reset the repoTags to prevent them from leaking into a following
// image/manifest.
d.repoTags = []reference.NamedTagged{}
return nil
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still stateful, then…

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

… it’s also not documented that the caller is supposed to use (only) AddRepoTags after each PutManifest AFAICS.

}

// writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers
Expand Down Expand Up @@ -419,6 +408,34 @@ func (d *Destination) PutSignatures(ctx context.Context, signatures [][]byte, in

// Commit finishes writing data to the underlying io.Writer.
// It is the caller's responsibility to close it, if necessary.
func (d *Destination) Commit(ctx context.Context) error {
return d.tar.Close()
func (d *Destination) Commit(ctx context.Context) (finalErr error) {
defer func() {
if err := d.tar.Close(); err != nil {
if finalErr == nil {
finalErr = err
} else {
finalErr = errors.Wrap(finalErr, err.Error())
}
}
}()
// Writing the manifest here instead of PutManifest allows for
// supporting multi-image archives.
itemsBytes, err := json.Marshal(d.manifest)
if err != nil {
return err
}

// FIXME? Do we also need to support the legacy format?
if err := d.sendBytes(manifestFileName, itemsBytes); err != nil {
return err
}

repoBytes, err := json.Marshal(d.repositories)
if err != nil {
return errors.Wrap(err, "Error marshaling repositories")
}
if err := d.sendBytes(legacyRepositoriesFileName, repoBytes); err != nil {
return errors.Wrap(err, "Error writing config json file")
}
return nil
}
Loading