From 80c2a00c499145de47dcda4d58693cf1a56621a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 4 Aug 2020 21:59:36 +0200 Subject: [PATCH 01/35] Remove an obsolete FIXME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do support the legacy format nowadays. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/tarfile/dest.go | 1 - go.sum | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/tarfile/dest.go b/docker/tarfile/dest.go index c171da5059..7baf73e022 100644 --- a/docker/tarfile/dest.go +++ b/docker/tarfile/dest.go @@ -268,7 +268,6 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest return err } - // FIXME? Do we also need to support the legacy format? return d.sendBytes(manifestFileName, itemsBytes) } diff --git a/go.sum b/go.sum index a41fa5bfa0..d464655f0e 100644 --- a/go.sum +++ b/go.sum @@ -107,6 +107,7 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls= github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= @@ -399,6 +400,7 @@ google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873 h1:nfPFGzJkUDX6uBm google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.23.1 h1:q4XQuHFC6I28BKZpo6IYyb3mNO+l7lSOxRuYTCiDfXk= google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.24.0 h1:vb/1TCsVn3DcJlQ0Gs1yB1pKI6Do2/QNwxdKqmc/b0s= google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= From a372946b809261948faa17653d16ff22f3a9ac09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 6 Aug 2020 02:06:11 +0200 Subject: [PATCH 02/35] Fix a comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/tarfile/dest.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/tarfile/dest.go b/docker/tarfile/dest.go index 7baf73e022..db0d8d45ef 100644 --- a/docker/tarfile/dest.go +++ b/docker/tarfile/dest.go @@ -315,7 +315,7 @@ func (d *Destination) writeLegacyLayerMetadata(layerDescriptors []manifest.Schem if lastLayerID != "" { layerConfig["parent"] = lastLayerID } - // The root layer configuration file is generated by using subpart of the image configuration + // The top layer configuration file is generated by using subpart of the image configuration if i == len(layerDescriptors)-1 { var config map[string]*json.RawMessage err := json.Unmarshal(d.config, &config) From b5d61587e75eef1fa42a8e69a2d10ac187d92e63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 29 Jul 2020 19:49:05 +0200 Subject: [PATCH 03/35] Move docker/tarfile.Destination to docker/internal/tarfile.Destination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... so that we can add more functionality to it without making it public API. Keep the original docker/tarfile.Destination as a compatibility shim. ManifestItem is moved to the private package to avoid an import cycle, but remains visible. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/dest.go | 423 +++++++++++++++++++++++++++++++ docker/internal/tarfile/types.go | 28 ++ docker/tarfile/dest.go | 341 ++----------------------- docker/tarfile/types.go | 19 +- 4 files changed, 469 insertions(+), 342 deletions(-) create mode 100644 docker/internal/tarfile/dest.go create mode 100644 docker/internal/tarfile/types.go diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go new file mode 100644 index 0000000000..db0d8d45ef --- /dev/null +++ b/docker/internal/tarfile/dest.go @@ -0,0 +1,423 @@ +package tarfile + +import ( + "archive/tar" + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "time" + + "github.com/containers/image/v5/docker/reference" + "github.com/containers/image/v5/internal/iolimits" + "github.com/containers/image/v5/internal/tmpdir" + "github.com/containers/image/v5/manifest" + "github.com/containers/image/v5/types" + "github.com/opencontainers/go-digest" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// Destination is a partial implementation of types.ImageDestination for writing to an io.Writer. +type Destination struct { + writer io.Writer + tar *tar.Writer + repoTags []reference.NamedTagged + // Other state. + blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs + config []byte + sysCtx *types.SystemContext +} + +// NewDestination returns a tarfile.Destination for the specified io.Writer. +// Deprecated: please use NewDestinationWithContext instead +func NewDestination(dest io.Writer, ref reference.NamedTagged) *Destination { + return NewDestinationWithContext(nil, dest, ref) +} + +// NewDestinationWithContext returns a tarfile.Destination for the specified io.Writer. +func NewDestinationWithContext(sys *types.SystemContext, dest io.Writer, ref reference.NamedTagged) *Destination { + repoTags := []reference.NamedTagged{} + if ref != nil { + repoTags = append(repoTags, ref) + } + return &Destination{ + writer: dest, + tar: tar.NewWriter(dest), + repoTags: repoTags, + blobs: make(map[digest.Digest]types.BlobInfo), + sysCtx: sys, + } +} + +// AddRepoTags adds the specified tags to the destination's repoTags. +func (d *Destination) AddRepoTags(tags []reference.NamedTagged) { + d.repoTags = append(d.repoTags, tags...) +} + +// SupportedManifestMIMETypes tells which manifest mime types the destination supports +// If an empty slice or nil it's returned, then any mime type can be tried to upload +func (d *Destination) SupportedManifestMIMETypes() []string { + return []string{ + manifest.DockerV2Schema2MediaType, // We rely on the types.Image.UpdatedImage schema conversion capabilities. + } +} + +// SupportsSignatures returns an error (to be displayed to the user) if the destination certainly can't store signatures. +// Note: It is still possible for PutSignatures to fail if SupportsSignatures returns nil. +func (d *Destination) SupportsSignatures(ctx context.Context) error { + return errors.Errorf("Storing signatures for docker tar files is not supported") +} + +// AcceptsForeignLayerURLs returns false iff foreign layers in manifest should be actually +// uploaded to the image destination, true otherwise. +func (d *Destination) AcceptsForeignLayerURLs() bool { + return false +} + +// MustMatchRuntimeOS returns true iff the destination can store only images targeted for the current runtime architecture and OS. False otherwise. +func (d *Destination) MustMatchRuntimeOS() bool { + return false +} + +// IgnoresEmbeddedDockerReference returns true iff the destination does not care about Image.EmbeddedDockerReferenceConflicts(), +// and would prefer to receive an unmodified manifest instead of one modified for the destination. +// Does not make a difference if Reference().DockerReference() is nil. +func (d *Destination) IgnoresEmbeddedDockerReference() bool { + return false // N/A, we only accept schema2 images where EmbeddedDockerReferenceConflicts() is always false. +} + +// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently. +func (d *Destination) HasThreadSafePutBlob() bool { + return false +} + +// PutBlob writes contents of stream and returns data representing the result (with all data filled in). +// inputInfo.Digest can be optionally provided if known; it is not mandatory for the implementation to verify it. +// inputInfo.Size is the expected length of stream, if known. +// May update cache. +// WARNING: The contents of stream are being verified on the fly. Until stream.Read() returns io.EOF, the contents of the data SHOULD NOT be available +// to any other readers for download using the supplied digest. +// If stream.Read() at any time, ESPECIALLY at end of input, returns an error, PutBlob MUST 1) fail, and 2) delete any data stored so far. +func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo types.BlobInfo, cache types.BlobInfoCache, isConfig bool) (types.BlobInfo, error) { + // Ouch, we need to stream the blob into a temporary file just to determine the size. + // When the layer is decompressed, we also have to generate the digest on uncompressed datas. + if inputInfo.Size == -1 || inputInfo.Digest.String() == "" { + logrus.Debugf("docker tarfile: input with unknown size, streaming to disk first ...") + streamCopy, err := ioutil.TempFile(tmpdir.TemporaryDirectoryForBigFiles(d.sysCtx), "docker-tarfile-blob") + if err != nil { + return types.BlobInfo{}, err + } + defer os.Remove(streamCopy.Name()) + defer streamCopy.Close() + + digester := digest.Canonical.Digester() + tee := io.TeeReader(stream, digester.Hash()) + // TODO: This can take quite some time, and should ideally be cancellable using ctx.Done(). + size, err := io.Copy(streamCopy, tee) + if err != nil { + return types.BlobInfo{}, err + } + _, err = streamCopy.Seek(0, io.SeekStart) + if err != nil { + return types.BlobInfo{}, err + } + inputInfo.Size = size // inputInfo is a struct, so we are only modifying our copy. + if inputInfo.Digest == "" { + inputInfo.Digest = digester.Digest() + } + stream = streamCopy + logrus.Debugf("... streaming done") + } + + // Maybe the blob has been already sent + ok, reusedInfo, err := d.TryReusingBlob(ctx, inputInfo, cache, false) + if err != nil { + return types.BlobInfo{}, err + } + if ok { + return reusedInfo, nil + } + + if isConfig { + buf, err := iolimits.ReadAtMost(stream, iolimits.MaxConfigBodySize) + if err != nil { + return types.BlobInfo{}, errors.Wrap(err, "Error reading Config file stream") + } + d.config = buf + if err := d.sendFile(inputInfo.Digest.Hex()+".json", inputInfo.Size, bytes.NewReader(buf)); err != nil { + return types.BlobInfo{}, errors.Wrap(err, "Error writing Config file") + } + } else { + // Note that this can't be e.g. filepath.Join(l.Digest.Hex(), legacyLayerFileName); due to the way + // writeLegacyLayerMetadata constructs layer IDs differently from inputinfo.Digest values (as described + // inside it), most of the layers would end up in subdirectories alone without any metadata; (docker load) + // tries to load every subdirectory as an image and fails if the config is missing. So, keep the layers + // in the root of the tarball. + if err := d.sendFile(inputInfo.Digest.Hex()+".tar", inputInfo.Size, stream); err != nil { + return types.BlobInfo{}, err + } + } + d.blobs[inputInfo.Digest] = types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size} + return types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size}, nil +} + +// TryReusingBlob checks whether the transport already contains, or can efficiently reuse, a blob, and if so, applies it to the current destination +// (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree). +// info.Digest must not be empty. +// If canSubstitute, TryReusingBlob can use an equivalent equivalent of the desired blob; in that case the returned info may not match the input. +// If the blob has been succesfully reused, returns (true, info, nil); info must contain at least a digest and size. +// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. +// May use and/or update cache. +func (d *Destination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { + if info.Digest == "" { + return false, types.BlobInfo{}, errors.Errorf("Can not check for a blob with unknown digest") + } + if blob, ok := d.blobs[info.Digest]; ok { + return true, types.BlobInfo{Digest: info.Digest, Size: blob.Size}, nil + } + return false, types.BlobInfo{}, nil +} + +func (d *Destination) createRepositoriesFile(rootLayerID string) error { + repositories := map[string]map[string]string{} + for _, repoTag := range d.repoTags { + if val, ok := repositories[repoTag.Name()]; ok { + val[repoTag.Tag()] = rootLayerID + } else { + repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID} + } + } + + b, err := json.Marshal(repositories) + if err != nil { + return errors.Wrap(err, "Error marshaling repositories") + } + if err := d.sendBytes(legacyRepositoriesFileName, b); err != nil { + return errors.Wrap(err, "Error writing config json file") + } + return nil +} + +// PutManifest writes manifest to the destination. +// The instanceDigest value is expected to always be nil, because this transport does not support manifest lists, so +// there can be no secondary manifests. +// FIXME? This should also receive a MIME type if known, to differentiate between schema versions. +// If the destination is in principle available, refuses this manifest type (e.g. it does not recognize the schema), +// but may accept a different manifest type, the returned error must be an ManifestTypeRejectedError. +func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest *digest.Digest) error { + if instanceDigest != nil { + return errors.New(`Manifest lists are not supported for docker tar files`) + } + // We do not bother with types.ManifestTypeRejectedError; our .SupportedManifestMIMETypes() above is already providing only one alternative, + // so the caller trying a different manifest kind would be pointless. + var man manifest.Schema2 + if err := json.Unmarshal(m, &man); err != nil { + return errors.Wrap(err, "Error parsing manifest") + } + if man.SchemaVersion != 2 || man.MediaType != manifest.DockerV2Schema2MediaType { + return errors.Errorf("Unsupported manifest type, need a Docker schema 2 manifest") + } + + layerPaths, lastLayerID, err := d.writeLegacyLayerMetadata(man.LayersDescriptors) + if err != nil { + return err + } + + if len(man.LayersDescriptors) > 0 { + if err := d.createRepositoriesFile(lastLayerID); err != nil { + return err + } + } + + repoTags := []string{} + for _, tag := range d.repoTags { + // For github.com/docker/docker consumers, this works just as well as + // refString := ref.String() + // because when reading the RepoTags strings, github.com/docker/docker/reference + // normalizes both of them to the same value. + // + // Doing it this way to include the normalized-out `docker.io[/library]` does make + // a difference for github.com/projectatomic/docker consumers, with the + // “Add --add-registry and --block-registry options to docker daemon” patch. + // These consumers treat reference strings which include a hostname and reference + // strings without a hostname differently. + // + // Using the host name here is more explicit about the intent, and it has the same + // effect as (docker pull) in projectatomic/docker, which tags the result using + // a hostname-qualified reference. + // See https://github.com/containers/image/issues/72 for a more detailed + // analysis and explanation. + refString := fmt.Sprintf("%s:%s", tag.Name(), tag.Tag()) + repoTags = append(repoTags, refString) + } + + items := []ManifestItem{{ + Config: man.ConfigDescriptor.Digest.Hex() + ".json", + RepoTags: repoTags, + Layers: layerPaths, + Parent: "", + LayerSources: nil, + }} + itemsBytes, err := json.Marshal(&items) + if err != nil { + return err + } + + return d.sendBytes(manifestFileName, itemsBytes) +} + +// writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers +func (d *Destination) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Descriptor) (layerPaths []string, lastLayerID string, err error) { + var chainID digest.Digest + lastLayerID = "" + for i, l := range layerDescriptors { + // This chainID value matches the computation in docker/docker/layer.CreateChainID … + if chainID == "" { + chainID = l.Digest + } else { + chainID = digest.Canonical.FromString(chainID.String() + " " + l.Digest.String()) + } + // … but note that this image ID does not match docker/docker/image/v1.CreateID. At least recent + // versions allocate new IDs on load, as long as the IDs we use are unique / cannot loop. + // + // Overall, the goal of computing a digest dependent on the full history is to avoid reusing an image ID + // (and possibly creating a loop in the "parent" links) if a layer with the same DiffID appears two or more + // times in layersDescriptors. The ChainID values are sufficient for this, the v1.CreateID computation + // which also mixes in the full image configuration seems unnecessary, at least as long as we are storing + // only a single image per tarball, i.e. all DiffID prefixes are unique (can’t differ only with + // configuration). + layerID := chainID.Hex() + + physicalLayerPath := l.Digest.Hex() + ".tar" + // The layer itself has been stored into physicalLayerPath in PutManifest. + // So, use that path for layerPaths used in the non-legacy manifest + layerPaths = append(layerPaths, physicalLayerPath) + // ... and create a symlink for the legacy format; + if err := d.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { + return nil, "", errors.Wrap(err, "Error creating layer symbolic link") + } + + b := []byte("1.0") + if err := d.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { + return nil, "", errors.Wrap(err, "Error writing VERSION file") + } + + // The legacy format requires a config file per layer + layerConfig := make(map[string]interface{}) + layerConfig["id"] = layerID + + // The root layer doesn't have any parent + if lastLayerID != "" { + layerConfig["parent"] = lastLayerID + } + // The top layer configuration file is generated by using subpart of the image configuration + if i == len(layerDescriptors)-1 { + var config map[string]*json.RawMessage + err := json.Unmarshal(d.config, &config) + if err != nil { + return nil, "", errors.Wrap(err, "Error unmarshaling config") + } + for _, attr := range [7]string{"architecture", "config", "container", "container_config", "created", "docker_version", "os"} { + layerConfig[attr] = config[attr] + } + } + b, err := json.Marshal(layerConfig) + if err != nil { + return nil, "", errors.Wrap(err, "Error marshaling layer config") + } + if err := d.sendBytes(filepath.Join(layerID, legacyConfigFileName), b); err != nil { + return nil, "", errors.Wrap(err, "Error writing config json file") + } + + lastLayerID = layerID + } + return layerPaths, lastLayerID, nil +} + +type tarFI struct { + path string + size int64 + isSymlink bool +} + +func (t *tarFI) Name() string { + return t.path +} +func (t *tarFI) Size() int64 { + return t.size +} +func (t *tarFI) Mode() os.FileMode { + if t.isSymlink { + return os.ModeSymlink + } + return 0444 +} +func (t *tarFI) ModTime() time.Time { + return time.Unix(0, 0) +} +func (t *tarFI) IsDir() bool { + return false +} +func (t *tarFI) Sys() interface{} { + return nil +} + +// sendSymlink sends a symlink into the tar stream. +func (d *Destination) sendSymlink(path string, target string) error { + hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: 0, isSymlink: true}, target) + if err != nil { + return nil + } + logrus.Debugf("Sending as tar link %s -> %s", path, target) + return d.tar.WriteHeader(hdr) +} + +// sendBytes sends a path into the tar stream. +func (d *Destination) sendBytes(path string, b []byte) error { + return d.sendFile(path, int64(len(b)), bytes.NewReader(b)) +} + +// sendFile sends a file into the tar stream. +func (d *Destination) sendFile(path string, expectedSize int64, stream io.Reader) error { + hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: expectedSize}, "") + if err != nil { + return nil + } + logrus.Debugf("Sending as tar file %s", path) + if err := d.tar.WriteHeader(hdr); err != nil { + return err + } + // TODO: This can take quite some time, and should ideally be cancellable using a context.Context. + size, err := io.Copy(d.tar, stream) + if err != nil { + return err + } + if size != expectedSize { + return errors.Errorf("Size mismatch when copying %s, expected %d, got %d", path, expectedSize, size) + } + return nil +} + +// PutSignatures would add the given signatures to the docker tarfile (currently not supported). +// The instanceDigest value is expected to always be nil, because this transport does not support manifest lists, so +// there can be no secondary manifests. MUST be called after PutManifest (signatures reference manifest contents). +func (d *Destination) PutSignatures(ctx context.Context, signatures [][]byte, instanceDigest *digest.Digest) error { + if instanceDigest != nil { + return errors.Errorf(`Manifest lists are not supported for docker tar files`) + } + if len(signatures) != 0 { + return errors.Errorf("Storing signatures for docker tar files is not supported") + } + return nil +} + +// Commit finishes writing data to the underlying io.Writer. +// It is the caller's responsibility to close it, if necessary. +func (d *Destination) Commit(ctx context.Context) error { + return d.tar.Close() +} diff --git a/docker/internal/tarfile/types.go b/docker/internal/tarfile/types.go new file mode 100644 index 0000000000..6e6ccd2d80 --- /dev/null +++ b/docker/internal/tarfile/types.go @@ -0,0 +1,28 @@ +package tarfile + +import ( + "github.com/containers/image/v5/manifest" + "github.com/opencontainers/go-digest" +) + +// Various data structures. + +// Based on github.com/docker/docker/image/tarexport/tarexport.go +const ( + manifestFileName = "manifest.json" + legacyLayerFileName = "layer.tar" + legacyConfigFileName = "json" + legacyVersionFileName = "VERSION" + legacyRepositoriesFileName = "repositories" +) + +// ManifestItem is an element of the array stored in the top-level manifest.json file. +type ManifestItem struct { // NOTE: This is visible as docker/tarfile.ManifestItem, and a part of the stable API. + Config string + RepoTags []string + Layers []string + Parent imageID `json:",omitempty"` + LayerSources map[digest.Digest]manifest.Schema2Descriptor `json:",omitempty"` +} + +type imageID string diff --git a/docker/tarfile/dest.go b/docker/tarfile/dest.go index db0d8d45ef..2c0f3a8a1a 100644 --- a/docker/tarfile/dest.go +++ b/docker/tarfile/dest.go @@ -1,36 +1,18 @@ package tarfile import ( - "archive/tar" - "bytes" "context" - "encoding/json" - "fmt" "io" - "io/ioutil" - "os" - "path/filepath" - "time" + internal "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/docker/reference" - "github.com/containers/image/v5/internal/iolimits" - "github.com/containers/image/v5/internal/tmpdir" - "github.com/containers/image/v5/manifest" "github.com/containers/image/v5/types" "github.com/opencontainers/go-digest" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" ) // Destination is a partial implementation of types.ImageDestination for writing to an io.Writer. type Destination struct { - writer io.Writer - tar *tar.Writer - repoTags []reference.NamedTagged - // Other state. - blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs - config []byte - sysCtx *types.SystemContext + internal *internal.Destination } // NewDestination returns a tarfile.Destination for the specified io.Writer. @@ -41,59 +23,47 @@ func NewDestination(dest io.Writer, ref reference.NamedTagged) *Destination { // NewDestinationWithContext returns a tarfile.Destination for the specified io.Writer. func NewDestinationWithContext(sys *types.SystemContext, dest io.Writer, ref reference.NamedTagged) *Destination { - repoTags := []reference.NamedTagged{} - if ref != nil { - repoTags = append(repoTags, ref) - } - return &Destination{ - writer: dest, - tar: tar.NewWriter(dest), - repoTags: repoTags, - blobs: make(map[digest.Digest]types.BlobInfo), - sysCtx: sys, - } + return &Destination{internal: internal.NewDestinationWithContext(sys, dest, ref)} } // AddRepoTags adds the specified tags to the destination's repoTags. func (d *Destination) AddRepoTags(tags []reference.NamedTagged) { - d.repoTags = append(d.repoTags, tags...) + d.internal.AddRepoTags(tags) } // SupportedManifestMIMETypes tells which manifest mime types the destination supports // If an empty slice or nil it's returned, then any mime type can be tried to upload func (d *Destination) SupportedManifestMIMETypes() []string { - return []string{ - manifest.DockerV2Schema2MediaType, // We rely on the types.Image.UpdatedImage schema conversion capabilities. - } + return d.internal.SupportedManifestMIMETypes() } // SupportsSignatures returns an error (to be displayed to the user) if the destination certainly can't store signatures. // Note: It is still possible for PutSignatures to fail if SupportsSignatures returns nil. func (d *Destination) SupportsSignatures(ctx context.Context) error { - return errors.Errorf("Storing signatures for docker tar files is not supported") + return d.internal.SupportsSignatures(ctx) } // AcceptsForeignLayerURLs returns false iff foreign layers in manifest should be actually // uploaded to the image destination, true otherwise. func (d *Destination) AcceptsForeignLayerURLs() bool { - return false + return d.internal.AcceptsForeignLayerURLs() } // MustMatchRuntimeOS returns true iff the destination can store only images targeted for the current runtime architecture and OS. False otherwise. func (d *Destination) MustMatchRuntimeOS() bool { - return false + return d.internal.MustMatchRuntimeOS() } // IgnoresEmbeddedDockerReference returns true iff the destination does not care about Image.EmbeddedDockerReferenceConflicts(), // and would prefer to receive an unmodified manifest instead of one modified for the destination. // Does not make a difference if Reference().DockerReference() is nil. func (d *Destination) IgnoresEmbeddedDockerReference() bool { - return false // N/A, we only accept schema2 images where EmbeddedDockerReferenceConflicts() is always false. + return d.internal.IgnoresEmbeddedDockerReference() } // HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently. func (d *Destination) HasThreadSafePutBlob() bool { - return false + return d.internal.HasThreadSafePutBlob() } // PutBlob writes contents of stream and returns data representing the result (with all data filled in). @@ -104,66 +74,7 @@ func (d *Destination) HasThreadSafePutBlob() bool { // to any other readers for download using the supplied digest. // If stream.Read() at any time, ESPECIALLY at end of input, returns an error, PutBlob MUST 1) fail, and 2) delete any data stored so far. func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo types.BlobInfo, cache types.BlobInfoCache, isConfig bool) (types.BlobInfo, error) { - // Ouch, we need to stream the blob into a temporary file just to determine the size. - // When the layer is decompressed, we also have to generate the digest on uncompressed datas. - if inputInfo.Size == -1 || inputInfo.Digest.String() == "" { - logrus.Debugf("docker tarfile: input with unknown size, streaming to disk first ...") - streamCopy, err := ioutil.TempFile(tmpdir.TemporaryDirectoryForBigFiles(d.sysCtx), "docker-tarfile-blob") - if err != nil { - return types.BlobInfo{}, err - } - defer os.Remove(streamCopy.Name()) - defer streamCopy.Close() - - digester := digest.Canonical.Digester() - tee := io.TeeReader(stream, digester.Hash()) - // TODO: This can take quite some time, and should ideally be cancellable using ctx.Done(). - size, err := io.Copy(streamCopy, tee) - if err != nil { - return types.BlobInfo{}, err - } - _, err = streamCopy.Seek(0, io.SeekStart) - if err != nil { - return types.BlobInfo{}, err - } - inputInfo.Size = size // inputInfo is a struct, so we are only modifying our copy. - if inputInfo.Digest == "" { - inputInfo.Digest = digester.Digest() - } - stream = streamCopy - logrus.Debugf("... streaming done") - } - - // Maybe the blob has been already sent - ok, reusedInfo, err := d.TryReusingBlob(ctx, inputInfo, cache, false) - if err != nil { - return types.BlobInfo{}, err - } - if ok { - return reusedInfo, nil - } - - if isConfig { - buf, err := iolimits.ReadAtMost(stream, iolimits.MaxConfigBodySize) - if err != nil { - return types.BlobInfo{}, errors.Wrap(err, "Error reading Config file stream") - } - d.config = buf - if err := d.sendFile(inputInfo.Digest.Hex()+".json", inputInfo.Size, bytes.NewReader(buf)); err != nil { - return types.BlobInfo{}, errors.Wrap(err, "Error writing Config file") - } - } else { - // Note that this can't be e.g. filepath.Join(l.Digest.Hex(), legacyLayerFileName); due to the way - // writeLegacyLayerMetadata constructs layer IDs differently from inputinfo.Digest values (as described - // inside it), most of the layers would end up in subdirectories alone without any metadata; (docker load) - // tries to load every subdirectory as an image and fails if the config is missing. So, keep the layers - // in the root of the tarball. - if err := d.sendFile(inputInfo.Digest.Hex()+".tar", inputInfo.Size, stream); err != nil { - return types.BlobInfo{}, err - } - } - d.blobs[inputInfo.Digest] = types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size} - return types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size}, nil + return d.internal.PutBlob(ctx, stream, inputInfo, cache, isConfig) } // TryReusingBlob checks whether the transport already contains, or can efficiently reuse, a blob, and if so, applies it to the current destination @@ -174,33 +85,7 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. // May use and/or update cache. func (d *Destination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { - if info.Digest == "" { - return false, types.BlobInfo{}, errors.Errorf("Can not check for a blob with unknown digest") - } - if blob, ok := d.blobs[info.Digest]; ok { - return true, types.BlobInfo{Digest: info.Digest, Size: blob.Size}, nil - } - return false, types.BlobInfo{}, nil -} - -func (d *Destination) createRepositoriesFile(rootLayerID string) error { - repositories := map[string]map[string]string{} - for _, repoTag := range d.repoTags { - if val, ok := repositories[repoTag.Name()]; ok { - val[repoTag.Tag()] = rootLayerID - } else { - repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID} - } - } - - b, err := json.Marshal(repositories) - if err != nil { - return errors.Wrap(err, "Error marshaling repositories") - } - if err := d.sendBytes(legacyRepositoriesFileName, b); err != nil { - return errors.Wrap(err, "Error writing config json file") - } - return nil + return d.internal.TryReusingBlob(ctx, info, cache, canSubstitute) } // PutManifest writes manifest to the destination. @@ -210,214 +95,18 @@ func (d *Destination) createRepositoriesFile(rootLayerID string) error { // If the destination is in principle available, refuses this manifest type (e.g. it does not recognize the schema), // but may accept a different manifest type, the returned error must be an ManifestTypeRejectedError. func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest *digest.Digest) error { - if instanceDigest != nil { - return errors.New(`Manifest lists are not supported for docker tar files`) - } - // We do not bother with types.ManifestTypeRejectedError; our .SupportedManifestMIMETypes() above is already providing only one alternative, - // so the caller trying a different manifest kind would be pointless. - var man manifest.Schema2 - if err := json.Unmarshal(m, &man); err != nil { - return errors.Wrap(err, "Error parsing manifest") - } - if man.SchemaVersion != 2 || man.MediaType != manifest.DockerV2Schema2MediaType { - return errors.Errorf("Unsupported manifest type, need a Docker schema 2 manifest") - } - - layerPaths, lastLayerID, err := d.writeLegacyLayerMetadata(man.LayersDescriptors) - if err != nil { - return err - } - - if len(man.LayersDescriptors) > 0 { - if err := d.createRepositoriesFile(lastLayerID); err != nil { - return err - } - } - - repoTags := []string{} - for _, tag := range d.repoTags { - // For github.com/docker/docker consumers, this works just as well as - // refString := ref.String() - // because when reading the RepoTags strings, github.com/docker/docker/reference - // normalizes both of them to the same value. - // - // Doing it this way to include the normalized-out `docker.io[/library]` does make - // a difference for github.com/projectatomic/docker consumers, with the - // “Add --add-registry and --block-registry options to docker daemon” patch. - // These consumers treat reference strings which include a hostname and reference - // strings without a hostname differently. - // - // Using the host name here is more explicit about the intent, and it has the same - // effect as (docker pull) in projectatomic/docker, which tags the result using - // a hostname-qualified reference. - // See https://github.com/containers/image/issues/72 for a more detailed - // analysis and explanation. - refString := fmt.Sprintf("%s:%s", tag.Name(), tag.Tag()) - repoTags = append(repoTags, refString) - } - - items := []ManifestItem{{ - Config: man.ConfigDescriptor.Digest.Hex() + ".json", - RepoTags: repoTags, - Layers: layerPaths, - Parent: "", - LayerSources: nil, - }} - itemsBytes, err := json.Marshal(&items) - if err != nil { - return err - } - - return d.sendBytes(manifestFileName, itemsBytes) -} - -// writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers -func (d *Destination) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Descriptor) (layerPaths []string, lastLayerID string, err error) { - var chainID digest.Digest - lastLayerID = "" - for i, l := range layerDescriptors { - // This chainID value matches the computation in docker/docker/layer.CreateChainID … - if chainID == "" { - chainID = l.Digest - } else { - chainID = digest.Canonical.FromString(chainID.String() + " " + l.Digest.String()) - } - // … but note that this image ID does not match docker/docker/image/v1.CreateID. At least recent - // versions allocate new IDs on load, as long as the IDs we use are unique / cannot loop. - // - // Overall, the goal of computing a digest dependent on the full history is to avoid reusing an image ID - // (and possibly creating a loop in the "parent" links) if a layer with the same DiffID appears two or more - // times in layersDescriptors. The ChainID values are sufficient for this, the v1.CreateID computation - // which also mixes in the full image configuration seems unnecessary, at least as long as we are storing - // only a single image per tarball, i.e. all DiffID prefixes are unique (can’t differ only with - // configuration). - layerID := chainID.Hex() - - physicalLayerPath := l.Digest.Hex() + ".tar" - // The layer itself has been stored into physicalLayerPath in PutManifest. - // So, use that path for layerPaths used in the non-legacy manifest - layerPaths = append(layerPaths, physicalLayerPath) - // ... and create a symlink for the legacy format; - if err := d.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { - return nil, "", errors.Wrap(err, "Error creating layer symbolic link") - } - - b := []byte("1.0") - if err := d.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { - return nil, "", errors.Wrap(err, "Error writing VERSION file") - } - - // The legacy format requires a config file per layer - layerConfig := make(map[string]interface{}) - layerConfig["id"] = layerID - - // The root layer doesn't have any parent - if lastLayerID != "" { - layerConfig["parent"] = lastLayerID - } - // The top layer configuration file is generated by using subpart of the image configuration - if i == len(layerDescriptors)-1 { - var config map[string]*json.RawMessage - err := json.Unmarshal(d.config, &config) - if err != nil { - return nil, "", errors.Wrap(err, "Error unmarshaling config") - } - for _, attr := range [7]string{"architecture", "config", "container", "container_config", "created", "docker_version", "os"} { - layerConfig[attr] = config[attr] - } - } - b, err := json.Marshal(layerConfig) - if err != nil { - return nil, "", errors.Wrap(err, "Error marshaling layer config") - } - if err := d.sendBytes(filepath.Join(layerID, legacyConfigFileName), b); err != nil { - return nil, "", errors.Wrap(err, "Error writing config json file") - } - - lastLayerID = layerID - } - return layerPaths, lastLayerID, nil -} - -type tarFI struct { - path string - size int64 - isSymlink bool -} - -func (t *tarFI) Name() string { - return t.path -} -func (t *tarFI) Size() int64 { - return t.size -} -func (t *tarFI) Mode() os.FileMode { - if t.isSymlink { - return os.ModeSymlink - } - return 0444 -} -func (t *tarFI) ModTime() time.Time { - return time.Unix(0, 0) -} -func (t *tarFI) IsDir() bool { - return false -} -func (t *tarFI) Sys() interface{} { - return nil -} - -// sendSymlink sends a symlink into the tar stream. -func (d *Destination) sendSymlink(path string, target string) error { - hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: 0, isSymlink: true}, target) - if err != nil { - return nil - } - logrus.Debugf("Sending as tar link %s -> %s", path, target) - return d.tar.WriteHeader(hdr) -} - -// sendBytes sends a path into the tar stream. -func (d *Destination) sendBytes(path string, b []byte) error { - return d.sendFile(path, int64(len(b)), bytes.NewReader(b)) -} - -// sendFile sends a file into the tar stream. -func (d *Destination) sendFile(path string, expectedSize int64, stream io.Reader) error { - hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: expectedSize}, "") - if err != nil { - return nil - } - logrus.Debugf("Sending as tar file %s", path) - if err := d.tar.WriteHeader(hdr); err != nil { - return err - } - // TODO: This can take quite some time, and should ideally be cancellable using a context.Context. - size, err := io.Copy(d.tar, stream) - if err != nil { - return err - } - if size != expectedSize { - return errors.Errorf("Size mismatch when copying %s, expected %d, got %d", path, expectedSize, size) - } - return nil + return d.internal.PutManifest(ctx, m, instanceDigest) } // PutSignatures would add the given signatures to the docker tarfile (currently not supported). // The instanceDigest value is expected to always be nil, because this transport does not support manifest lists, so // there can be no secondary manifests. MUST be called after PutManifest (signatures reference manifest contents). func (d *Destination) PutSignatures(ctx context.Context, signatures [][]byte, instanceDigest *digest.Digest) error { - if instanceDigest != nil { - return errors.Errorf(`Manifest lists are not supported for docker tar files`) - } - if len(signatures) != 0 { - return errors.Errorf("Storing signatures for docker tar files is not supported") - } - return nil + return d.internal.PutSignatures(ctx, signatures, instanceDigest) } // Commit finishes writing data to the underlying io.Writer. // It is the caller's responsibility to close it, if necessary. func (d *Destination) Commit(ctx context.Context) error { - return d.tar.Close() + return d.internal.Commit(ctx) } diff --git a/docker/tarfile/types.go b/docker/tarfile/types.go index ac222528a0..358d304e5a 100644 --- a/docker/tarfile/types.go +++ b/docker/tarfile/types.go @@ -1,28 +1,15 @@ package tarfile import ( - "github.com/containers/image/v5/manifest" - "github.com/opencontainers/go-digest" + internal "github.com/containers/image/v5/docker/internal/tarfile" ) // Various data structures. // Based on github.com/docker/docker/image/tarexport/tarexport.go const ( - manifestFileName = "manifest.json" - legacyLayerFileName = "layer.tar" - legacyConfigFileName = "json" - legacyVersionFileName = "VERSION" - legacyRepositoriesFileName = "repositories" + manifestFileName = "manifest.json" ) // ManifestItem is an element of the array stored in the top-level manifest.json file. -type ManifestItem struct { - Config string - RepoTags []string - Layers []string - Parent imageID `json:",omitempty"` - LayerSources map[digest.Digest]manifest.Schema2Descriptor `json:",omitempty"` -} - -type imageID string +type ManifestItem = internal.ManifestItem // All public members from the internal package remain accessible. From e1e5452879e183d311ae58647a8f69d94a9dc8a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 29 Jul 2020 19:51:27 +0200 Subject: [PATCH 04/35] Use the docker/internal/tarfile.Destination from docker/daemon and docker/archive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... instead of the forwarding shims. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/archive/dest.go | 2 +- docker/daemon/daemon_dest.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/archive/dest.go b/docker/archive/dest.go index 1cf197429b..e86569cffd 100644 --- a/docker/archive/dest.go +++ b/docker/archive/dest.go @@ -5,7 +5,7 @@ import ( "io" "os" - "github.com/containers/image/v5/docker/tarfile" + "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/types" "github.com/pkg/errors" ) diff --git a/docker/daemon/daemon_dest.go b/docker/daemon/daemon_dest.go index c6afd4bde0..32a60fb4e5 100644 --- a/docker/daemon/daemon_dest.go +++ b/docker/daemon/daemon_dest.go @@ -4,8 +4,8 @@ import ( "context" "io" + "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/docker/reference" - "github.com/containers/image/v5/docker/tarfile" "github.com/containers/image/v5/types" "github.com/docker/docker/client" "github.com/pkg/errors" From 5a6886cb235d6253e64de7f8ed0bb904c418cebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 29 Jul 2020 20:47:37 +0200 Subject: [PATCH 05/35] Remove deprecated non-SystemContext functions from docker/internal.tarfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit his is a private API now, so we can just drop it; rename the ...With[System]Context varianteto use the shorter name, and update all callers. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/archive/dest.go | 2 +- docker/daemon/daemon_dest.go | 2 +- docker/internal/tarfile/dest.go | 8 +------- docker/tarfile/dest.go | 2 +- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/docker/archive/dest.go b/docker/archive/dest.go index e86569cffd..12469ed963 100644 --- a/docker/archive/dest.go +++ b/docker/archive/dest.go @@ -36,7 +36,7 @@ func newImageDestination(sys *types.SystemContext, ref archiveReference) (types. return nil, errors.New("docker-archive doesn't support modifying existing images") } - tarDest := tarfile.NewDestinationWithContext(sys, fh, ref.destinationRef) + tarDest := tarfile.NewDestination(sys, fh, ref.destinationRef) if sys != nil && sys.DockerArchiveAdditionalTags != nil { tarDest.AddRepoTags(sys.DockerArchiveAdditionalTags) } diff --git a/docker/daemon/daemon_dest.go b/docker/daemon/daemon_dest.go index 32a60fb4e5..1a7a1dc8d3 100644 --- a/docker/daemon/daemon_dest.go +++ b/docker/daemon/daemon_dest.go @@ -54,7 +54,7 @@ func newImageDestination(ctx context.Context, sys *types.SystemContext, ref daem return &daemonImageDestination{ ref: ref, mustMatchRuntimeOS: mustMatchRuntimeOS, - Destination: tarfile.NewDestinationWithContext(sys, writer, namedTaggedRef), + Destination: tarfile.NewDestination(sys, writer, namedTaggedRef), goroutineCancel: goroutineCancel, statusChannel: statusChannel, writer: writer, diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go index db0d8d45ef..b13c60add6 100644 --- a/docker/internal/tarfile/dest.go +++ b/docker/internal/tarfile/dest.go @@ -34,13 +34,7 @@ type Destination struct { } // NewDestination returns a tarfile.Destination for the specified io.Writer. -// Deprecated: please use NewDestinationWithContext instead -func NewDestination(dest io.Writer, ref reference.NamedTagged) *Destination { - return NewDestinationWithContext(nil, dest, ref) -} - -// NewDestinationWithContext returns a tarfile.Destination for the specified io.Writer. -func NewDestinationWithContext(sys *types.SystemContext, dest io.Writer, ref reference.NamedTagged) *Destination { +func NewDestination(sys *types.SystemContext, dest io.Writer, ref reference.NamedTagged) *Destination { repoTags := []reference.NamedTagged{} if ref != nil { repoTags = append(repoTags, ref) diff --git a/docker/tarfile/dest.go b/docker/tarfile/dest.go index 2c0f3a8a1a..cffb497f59 100644 --- a/docker/tarfile/dest.go +++ b/docker/tarfile/dest.go @@ -23,7 +23,7 @@ func NewDestination(dest io.Writer, ref reference.NamedTagged) *Destination { // NewDestinationWithContext returns a tarfile.Destination for the specified io.Writer. func NewDestinationWithContext(sys *types.SystemContext, dest io.Writer, ref reference.NamedTagged) *Destination { - return &Destination{internal: internal.NewDestinationWithContext(sys, dest, ref)} + return &Destination{internal: internal.NewDestination(sys, dest, ref)} } // AddRepoTags adds the specified tags to the destination's repoTags. From 9680473ca1e066608841ed14195ffb70e8751192 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 4 Aug 2020 22:57:10 +0200 Subject: [PATCH 06/35] Introduce Destination.configPath and Destination.physicalLayerPath MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will be splitting uses of these paths across two objects, so make them a private API instead of a copy&pasted convention. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/dest.go | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go index b13c60add6..61f99b85a3 100644 --- a/docker/internal/tarfile/dest.go +++ b/docker/internal/tarfile/dest.go @@ -143,16 +143,11 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t return types.BlobInfo{}, errors.Wrap(err, "Error reading Config file stream") } d.config = buf - if err := d.sendFile(inputInfo.Digest.Hex()+".json", inputInfo.Size, bytes.NewReader(buf)); err != nil { + if err := d.sendFile(d.configPath(inputInfo.Digest), inputInfo.Size, bytes.NewReader(buf)); err != nil { return types.BlobInfo{}, errors.Wrap(err, "Error writing Config file") } } else { - // Note that this can't be e.g. filepath.Join(l.Digest.Hex(), legacyLayerFileName); due to the way - // writeLegacyLayerMetadata constructs layer IDs differently from inputinfo.Digest values (as described - // inside it), most of the layers would end up in subdirectories alone without any metadata; (docker load) - // tries to load every subdirectory as an image and fails if the config is missing. So, keep the layers - // in the root of the tarball. - if err := d.sendFile(inputInfo.Digest.Hex()+".tar", inputInfo.Size, stream); err != nil { + if err := d.sendFile(d.physicalLayerPath(inputInfo.Digest), inputInfo.Size, stream); err != nil { return types.BlobInfo{}, err } } @@ -251,7 +246,7 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest } items := []ManifestItem{{ - Config: man.ConfigDescriptor.Digest.Hex() + ".json", + Config: d.configPath(man.ConfigDescriptor.Digest), RepoTags: repoTags, Layers: layerPaths, Parent: "", @@ -287,7 +282,7 @@ func (d *Destination) writeLegacyLayerMetadata(layerDescriptors []manifest.Schem // configuration). layerID := chainID.Hex() - physicalLayerPath := l.Digest.Hex() + ".tar" + physicalLayerPath := d.physicalLayerPath(l.Digest) // The layer itself has been stored into physicalLayerPath in PutManifest. // So, use that path for layerPaths used in the non-legacy manifest layerPaths = append(layerPaths, physicalLayerPath) @@ -333,6 +328,26 @@ func (d *Destination) writeLegacyLayerMetadata(layerDescriptors []manifest.Schem return layerPaths, lastLayerID, nil } +// configPath returns a path we choose for storing a config with the specified digest. +// NOTE: This is an internal implementation detail, not a format property, and can change +// any time. +func (d *Destination) configPath(configDigest digest.Digest) string { + return configDigest.Hex() + ".json" +} + +// physicalLayerPath returns a path we choose for storing a layer with the specified digest +// (the actual path, i.e. a regular file, not a symlink that may be used in the legacy format). +// NOTE: This is an internal implementation detail, not a format property, and can change +// any time. +func (d *Destination) physicalLayerPath(layerDigest digest.Digest) string { + // Note that this can't be e.g. filepath.Join(l.Digest.Hex(), legacyLayerFileName); due to the way + // writeLegacyLayerMetadata constructs layer IDs differently from inputinfo.Digest values (as described + // inside it), most of the layers would end up in subdirectories alone without any metadata; (docker load) + // tries to load every subdirectory as an image and fails if the config is missing. So, keep the layers + // in the root of the tarball. + return layerDigest.Hex() + ".tar" +} + type tarFI struct { path string size int64 From a9938d157fee955c7b9930e540d5a36c8f4115bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 4 Aug 2020 21:53:13 +0200 Subject: [PATCH 07/35] Split docker/internal.tarfile.Writer from Destination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... to eventually allow creating multiple Destinations from a single Writer. NOTE: This only splits the implementation into two, mostly at function boundary; it does NOTHING to support writing multiple images; top-level metadata is written independently on each Destination.PutManifest . This will be fixed soon. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/dest.go | 209 ++------------------------ docker/internal/tarfile/writer.go | 234 ++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 197 deletions(-) create mode 100644 docker/internal/tarfile/writer.go diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go index 61f99b85a3..ceaf0b2aab 100644 --- a/docker/internal/tarfile/dest.go +++ b/docker/internal/tarfile/dest.go @@ -1,7 +1,6 @@ package tarfile import ( - "archive/tar" "bytes" "context" "encoding/json" @@ -9,8 +8,6 @@ import ( "io" "io/ioutil" "os" - "path/filepath" - "time" "github.com/containers/image/v5/docker/reference" "github.com/containers/image/v5/internal/iolimits" @@ -24,11 +21,9 @@ import ( // Destination is a partial implementation of types.ImageDestination for writing to an io.Writer. type Destination struct { - writer io.Writer - tar *tar.Writer + archive *Writer repoTags []reference.NamedTagged // Other state. - blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs config []byte sysCtx *types.SystemContext } @@ -40,10 +35,8 @@ func NewDestination(sys *types.SystemContext, dest io.Writer, ref reference.Name repoTags = append(repoTags, ref) } return &Destination{ - writer: dest, - tar: tar.NewWriter(dest), + archive: NewWriter(dest), repoTags: repoTags, - blobs: make(map[digest.Digest]types.BlobInfo), sysCtx: sys, } } @@ -129,7 +122,7 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t } // Maybe the blob has been already sent - ok, reusedInfo, err := d.TryReusingBlob(ctx, inputInfo, cache, false) + ok, reusedInfo, err := d.archive.tryReusingBlob(inputInfo) if err != nil { return types.BlobInfo{}, err } @@ -143,15 +136,15 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t return types.BlobInfo{}, errors.Wrap(err, "Error reading Config file stream") } d.config = buf - if err := d.sendFile(d.configPath(inputInfo.Digest), inputInfo.Size, bytes.NewReader(buf)); err != nil { + if err := d.archive.sendFile(d.archive.configPath(inputInfo.Digest), inputInfo.Size, bytes.NewReader(buf)); err != nil { return types.BlobInfo{}, errors.Wrap(err, "Error writing Config file") } } else { - if err := d.sendFile(d.physicalLayerPath(inputInfo.Digest), inputInfo.Size, stream); err != nil { + if err := d.archive.sendFile(d.archive.physicalLayerPath(inputInfo.Digest), inputInfo.Size, stream); err != nil { return types.BlobInfo{}, err } } - d.blobs[inputInfo.Digest] = types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size} + d.archive.recordBlob(types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size}) return types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size}, nil } @@ -163,33 +156,7 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. // May use and/or update cache. func (d *Destination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { - if info.Digest == "" { - return false, types.BlobInfo{}, errors.Errorf("Can not check for a blob with unknown digest") - } - if blob, ok := d.blobs[info.Digest]; ok { - return true, types.BlobInfo{Digest: info.Digest, Size: blob.Size}, nil - } - return false, types.BlobInfo{}, nil -} - -func (d *Destination) createRepositoriesFile(rootLayerID string) error { - repositories := map[string]map[string]string{} - for _, repoTag := range d.repoTags { - if val, ok := repositories[repoTag.Name()]; ok { - val[repoTag.Tag()] = rootLayerID - } else { - repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID} - } - } - - b, err := json.Marshal(repositories) - if err != nil { - return errors.Wrap(err, "Error marshaling repositories") - } - if err := d.sendBytes(legacyRepositoriesFileName, b); err != nil { - return errors.Wrap(err, "Error writing config json file") - } - return nil + return d.archive.tryReusingBlob(info) } // PutManifest writes manifest to the destination. @@ -212,13 +179,13 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest return errors.Errorf("Unsupported manifest type, need a Docker schema 2 manifest") } - layerPaths, lastLayerID, err := d.writeLegacyLayerMetadata(man.LayersDescriptors) + layerPaths, lastLayerID, err := d.archive.writeLegacyLayerMetadata(man.LayersDescriptors, d.config) if err != nil { return err } if len(man.LayersDescriptors) > 0 { - if err := d.createRepositoriesFile(lastLayerID); err != nil { + if err := d.archive.createRepositoriesFile(lastLayerID, d.repoTags); err != nil { return err } } @@ -246,7 +213,7 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest } items := []ManifestItem{{ - Config: d.configPath(man.ConfigDescriptor.Digest), + Config: d.archive.configPath(man.ConfigDescriptor.Digest), RepoTags: repoTags, Layers: layerPaths, Parent: "", @@ -257,159 +224,7 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest return err } - return d.sendBytes(manifestFileName, itemsBytes) -} - -// writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers -func (d *Destination) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Descriptor) (layerPaths []string, lastLayerID string, err error) { - var chainID digest.Digest - lastLayerID = "" - for i, l := range layerDescriptors { - // This chainID value matches the computation in docker/docker/layer.CreateChainID … - if chainID == "" { - chainID = l.Digest - } else { - chainID = digest.Canonical.FromString(chainID.String() + " " + l.Digest.String()) - } - // … but note that this image ID does not match docker/docker/image/v1.CreateID. At least recent - // versions allocate new IDs on load, as long as the IDs we use are unique / cannot loop. - // - // Overall, the goal of computing a digest dependent on the full history is to avoid reusing an image ID - // (and possibly creating a loop in the "parent" links) if a layer with the same DiffID appears two or more - // times in layersDescriptors. The ChainID values are sufficient for this, the v1.CreateID computation - // which also mixes in the full image configuration seems unnecessary, at least as long as we are storing - // only a single image per tarball, i.e. all DiffID prefixes are unique (can’t differ only with - // configuration). - layerID := chainID.Hex() - - physicalLayerPath := d.physicalLayerPath(l.Digest) - // The layer itself has been stored into physicalLayerPath in PutManifest. - // So, use that path for layerPaths used in the non-legacy manifest - layerPaths = append(layerPaths, physicalLayerPath) - // ... and create a symlink for the legacy format; - if err := d.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { - return nil, "", errors.Wrap(err, "Error creating layer symbolic link") - } - - b := []byte("1.0") - if err := d.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { - return nil, "", errors.Wrap(err, "Error writing VERSION file") - } - - // The legacy format requires a config file per layer - layerConfig := make(map[string]interface{}) - layerConfig["id"] = layerID - - // The root layer doesn't have any parent - if lastLayerID != "" { - layerConfig["parent"] = lastLayerID - } - // The top layer configuration file is generated by using subpart of the image configuration - if i == len(layerDescriptors)-1 { - var config map[string]*json.RawMessage - err := json.Unmarshal(d.config, &config) - if err != nil { - return nil, "", errors.Wrap(err, "Error unmarshaling config") - } - for _, attr := range [7]string{"architecture", "config", "container", "container_config", "created", "docker_version", "os"} { - layerConfig[attr] = config[attr] - } - } - b, err := json.Marshal(layerConfig) - if err != nil { - return nil, "", errors.Wrap(err, "Error marshaling layer config") - } - if err := d.sendBytes(filepath.Join(layerID, legacyConfigFileName), b); err != nil { - return nil, "", errors.Wrap(err, "Error writing config json file") - } - - lastLayerID = layerID - } - return layerPaths, lastLayerID, nil -} - -// configPath returns a path we choose for storing a config with the specified digest. -// NOTE: This is an internal implementation detail, not a format property, and can change -// any time. -func (d *Destination) configPath(configDigest digest.Digest) string { - return configDigest.Hex() + ".json" -} - -// physicalLayerPath returns a path we choose for storing a layer with the specified digest -// (the actual path, i.e. a regular file, not a symlink that may be used in the legacy format). -// NOTE: This is an internal implementation detail, not a format property, and can change -// any time. -func (d *Destination) physicalLayerPath(layerDigest digest.Digest) string { - // Note that this can't be e.g. filepath.Join(l.Digest.Hex(), legacyLayerFileName); due to the way - // writeLegacyLayerMetadata constructs layer IDs differently from inputinfo.Digest values (as described - // inside it), most of the layers would end up in subdirectories alone without any metadata; (docker load) - // tries to load every subdirectory as an image and fails if the config is missing. So, keep the layers - // in the root of the tarball. - return layerDigest.Hex() + ".tar" -} - -type tarFI struct { - path string - size int64 - isSymlink bool -} - -func (t *tarFI) Name() string { - return t.path -} -func (t *tarFI) Size() int64 { - return t.size -} -func (t *tarFI) Mode() os.FileMode { - if t.isSymlink { - return os.ModeSymlink - } - return 0444 -} -func (t *tarFI) ModTime() time.Time { - return time.Unix(0, 0) -} -func (t *tarFI) IsDir() bool { - return false -} -func (t *tarFI) Sys() interface{} { - return nil -} - -// sendSymlink sends a symlink into the tar stream. -func (d *Destination) sendSymlink(path string, target string) error { - hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: 0, isSymlink: true}, target) - if err != nil { - return nil - } - logrus.Debugf("Sending as tar link %s -> %s", path, target) - return d.tar.WriteHeader(hdr) -} - -// sendBytes sends a path into the tar stream. -func (d *Destination) sendBytes(path string, b []byte) error { - return d.sendFile(path, int64(len(b)), bytes.NewReader(b)) -} - -// sendFile sends a file into the tar stream. -func (d *Destination) sendFile(path string, expectedSize int64, stream io.Reader) error { - hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: expectedSize}, "") - if err != nil { - return nil - } - logrus.Debugf("Sending as tar file %s", path) - if err := d.tar.WriteHeader(hdr); err != nil { - return err - } - // TODO: This can take quite some time, and should ideally be cancellable using a context.Context. - size, err := io.Copy(d.tar, stream) - if err != nil { - return err - } - if size != expectedSize { - return errors.Errorf("Size mismatch when copying %s, expected %d, got %d", path, expectedSize, size) - } - return nil + return d.archive.sendBytes(manifestFileName, itemsBytes) } // PutSignatures would add the given signatures to the docker tarfile (currently not supported). @@ -428,5 +243,5 @@ func (d *Destination) PutSignatures(ctx context.Context, signatures [][]byte, in // Commit finishes writing data to the underlying io.Writer. // It is the caller's responsibility to close it, if necessary. func (d *Destination) Commit(ctx context.Context) error { - return d.tar.Close() + return d.archive.Close() } diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go new file mode 100644 index 0000000000..cbf4b6f196 --- /dev/null +++ b/docker/internal/tarfile/writer.go @@ -0,0 +1,234 @@ +package tarfile + +import ( + "archive/tar" + "bytes" + "encoding/json" + "io" + "os" + "path/filepath" + "time" + + "github.com/containers/image/v5/docker/reference" + "github.com/containers/image/v5/manifest" + "github.com/containers/image/v5/types" + "github.com/opencontainers/go-digest" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// Writer allows creating a (docker save)-formatted tar archive containing one or more images. +type Writer struct { + writer io.Writer + tar *tar.Writer + // Other state. + blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs +} + +// NewWriter returns a Writer for the specified io.Writer. +// The caller must eventually call .Close() on the returned object to create a valid archive. +func NewWriter(dest io.Writer) *Writer { + return &Writer{ + writer: dest, + tar: tar.NewWriter(dest), + blobs: make(map[digest.Digest]types.BlobInfo), + } +} + +// tryReusingBlob checks whether the transport already contains, a blob, and if so, returns its metadata. +// info.Digest must not be empty. +// If the blob has been succesfully reused, returns (true, info, nil); info must contain at least a digest and size. +// If the transport can not reuse the requested blob, tryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. +func (w *Writer) tryReusingBlob(info types.BlobInfo) (bool, types.BlobInfo, error) { + if info.Digest == "" { + return false, types.BlobInfo{}, errors.Errorf("Can not check for a blob with unknown digest") + } + if blob, ok := w.blobs[info.Digest]; ok { + return true, types.BlobInfo{Digest: info.Digest, Size: blob.Size}, nil + } + return false, types.BlobInfo{}, nil +} + +// recordBlob records metadata of a recorded blob, which must contain at least a digest and size. +func (w *Writer) recordBlob(info types.BlobInfo) { + w.blobs[info.Digest] = info +} + +func (w *Writer) createRepositoriesFile(rootLayerID string, repoTags []reference.NamedTagged) error { + repositories := map[string]map[string]string{} + for _, repoTag := range repoTags { + if val, ok := repositories[repoTag.Name()]; ok { + val[repoTag.Tag()] = rootLayerID + } else { + repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID} + } + } + + b, err := json.Marshal(repositories) + if err != nil { + return errors.Wrap(err, "Error marshaling repositories") + } + if err := w.sendBytes(legacyRepositoriesFileName, b); err != nil { + return errors.Wrap(err, "Error writing config json file") + } + return nil +} + +// writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers +func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Descriptor, configBytes []byte) (layerPaths []string, lastLayerID string, err error) { + var chainID digest.Digest + lastLayerID = "" + for i, l := range layerDescriptors { + // This chainID value matches the computation in docker/docker/layer.CreateChainID … + if chainID == "" { + chainID = l.Digest + } else { + chainID = digest.Canonical.FromString(chainID.String() + " " + l.Digest.String()) + } + // … but note that this image ID does not match docker/docker/image/v1.CreateID. At least recent + // versions allocate new IDs on load, as long as the IDs we use are unique / cannot loop. + // + // Overall, the goal of computing a digest dependent on the full history is to avoid reusing an image ID + // (and possibly creating a loop in the "parent" links) if a layer with the same DiffID appears two or more + // times in layersDescriptors. The ChainID values are sufficient for this, the v1.CreateID computation + // which also mixes in the full image configuration seems unnecessary, at least as long as we are storing + // only a single image per tarball, i.e. all DiffID prefixes are unique (can’t differ only with + // configuration). + layerID := chainID.Hex() + + physicalLayerPath := w.physicalLayerPath(l.Digest) + // The layer itself has been stored into physicalLayerPath in PutManifest. + // So, use that path for layerPaths used in the non-legacy manifest + layerPaths = append(layerPaths, physicalLayerPath) + // ... and create a symlink for the legacy format; + if err := w.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { + return nil, "", errors.Wrap(err, "Error creating layer symbolic link") + } + + b := []byte("1.0") + if err := w.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { + return nil, "", errors.Wrap(err, "Error writing VERSION file") + } + + // The legacy format requires a config file per layer + layerConfig := make(map[string]interface{}) + layerConfig["id"] = layerID + + // The root layer doesn't have any parent + if lastLayerID != "" { + layerConfig["parent"] = lastLayerID + } + // The top layer configuration file is generated by using subpart of the image configuration + if i == len(layerDescriptors)-1 { + var config map[string]*json.RawMessage + err := json.Unmarshal(configBytes, &config) + if err != nil { + return nil, "", errors.Wrap(err, "Error unmarshaling config") + } + for _, attr := range [7]string{"architecture", "config", "container", "container_config", "created", "docker_version", "os"} { + layerConfig[attr] = config[attr] + } + } + b, err := json.Marshal(layerConfig) + if err != nil { + return nil, "", errors.Wrap(err, "Error marshaling layer config") + } + if err := w.sendBytes(filepath.Join(layerID, legacyConfigFileName), b); err != nil { + return nil, "", errors.Wrap(err, "Error writing config json file") + } + + lastLayerID = layerID + } + return layerPaths, lastLayerID, nil +} + +// Close writes all outstanding data about images to the archive, and finishes writing data +// to the underlying io.Writer. +// No more images can be added after this is called. +func (w *Writer) Close() error { + return w.tar.Close() +} + +// configPath returns a path we choose for storing a config with the specified digest. +// NOTE: This is an internal implementation detail, not a format property, and can change +// any time. +func (w *Writer) configPath(configDigest digest.Digest) string { + return configDigest.Hex() + ".json" +} + +// physicalLayerPath returns a path we choose for storing a layer with the specified digest +// (the actual path, i.e. a regular file, not a symlink that may be used in the legacy format). +// NOTE: This is an internal implementation detail, not a format property, and can change +// any time. +func (w *Writer) physicalLayerPath(layerDigest digest.Digest) string { + // Note that this can't be e.g. filepath.Join(l.Digest.Hex(), legacyLayerFileName); due to the way + // writeLegacyLayerMetadata constructs layer IDs differently from inputinfo.Digest values (as described + // inside it), most of the layers would end up in subdirectories alone without any metadata; (docker load) + // tries to load every subdirectory as an image and fails if the config is missing. So, keep the layers + // in the root of the tarball. + return layerDigest.Hex() + ".tar" +} + +type tarFI struct { + path string + size int64 + isSymlink bool +} + +func (t *tarFI) Name() string { + return t.path +} +func (t *tarFI) Size() int64 { + return t.size +} +func (t *tarFI) Mode() os.FileMode { + if t.isSymlink { + return os.ModeSymlink + } + return 0444 +} +func (t *tarFI) ModTime() time.Time { + return time.Unix(0, 0) +} +func (t *tarFI) IsDir() bool { + return false +} +func (t *tarFI) Sys() interface{} { + return nil +} + +// sendSymlink sends a symlink into the tar stream. +func (w *Writer) sendSymlink(path string, target string) error { + hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: 0, isSymlink: true}, target) + if err != nil { + return nil + } + logrus.Debugf("Sending as tar link %s -> %s", path, target) + return w.tar.WriteHeader(hdr) +} + +// sendBytes sends a path into the tar stream. +func (w *Writer) sendBytes(path string, b []byte) error { + return w.sendFile(path, int64(len(b)), bytes.NewReader(b)) +} + +// sendFile sends a file into the tar stream. +func (w *Writer) sendFile(path string, expectedSize int64, stream io.Reader) error { + hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: expectedSize}, "") + if err != nil { + return nil + } + logrus.Debugf("Sending as tar file %s", path) + if err := w.tar.WriteHeader(hdr); err != nil { + return err + } + // TODO: This can take quite some time, and should ideally be cancellable using a context.Context. + size, err := io.Copy(w.tar, stream) + if err != nil { + return err + } + if size != expectedSize { + return errors.Errorf("Size mismatch when copying %s, expected %d, got %d", path, expectedSize, size) + } + return nil +} From 1214e24a9fbf4e61da5286dba3bdbea98bc66b58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 4 Aug 2020 21:55:29 +0200 Subject: [PATCH 08/35] Move createRepositoriesFile to a bit better place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... to follow the PutManifest flow. Should not change behavior, nothing about the code was changed. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/writer.go | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index cbf4b6f196..bd19f3c1ca 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -54,26 +54,6 @@ func (w *Writer) recordBlob(info types.BlobInfo) { w.blobs[info.Digest] = info } -func (w *Writer) createRepositoriesFile(rootLayerID string, repoTags []reference.NamedTagged) error { - repositories := map[string]map[string]string{} - for _, repoTag := range repoTags { - if val, ok := repositories[repoTag.Name()]; ok { - val[repoTag.Tag()] = rootLayerID - } else { - repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID} - } - } - - b, err := json.Marshal(repositories) - if err != nil { - return errors.Wrap(err, "Error marshaling repositories") - } - if err := w.sendBytes(legacyRepositoriesFileName, b); err != nil { - return errors.Wrap(err, "Error writing config json file") - } - return nil -} - // writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Descriptor, configBytes []byte) (layerPaths []string, lastLayerID string, err error) { var chainID digest.Digest @@ -142,6 +122,26 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des return layerPaths, lastLayerID, nil } +func (w *Writer) createRepositoriesFile(rootLayerID string, repoTags []reference.NamedTagged) error { + repositories := map[string]map[string]string{} + for _, repoTag := range repoTags { + if val, ok := repositories[repoTag.Name()]; ok { + val[repoTag.Tag()] = rootLayerID + } else { + repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID} + } + } + + b, err := json.Marshal(repositories) + if err != nil { + return errors.Wrap(err, "Error marshaling repositories") + } + if err := w.sendBytes(legacyRepositoriesFileName, b); err != nil { + return errors.Wrap(err, "Error writing config json file") + } + return nil +} + // Close writes all outstanding data about images to the archive, and finishes writing data // to the underlying io.Writer. // No more images can be added after this is called. From f4d4799a1bf9b1e995c640a060df150ee447fe3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 4 Aug 2020 22:02:30 +0200 Subject: [PATCH 09/35] Split Writer.createManifest from Destination.PutManifest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This still only supports one image, but we will fix that momentarily. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/dest.go | 37 +---------------------------- docker/internal/tarfile/writer.go | 39 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go index ceaf0b2aab..47efc89efe 100644 --- a/docker/internal/tarfile/dest.go +++ b/docker/internal/tarfile/dest.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "encoding/json" - "fmt" "io" "io/ioutil" "os" @@ -190,41 +189,7 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest } } - repoTags := []string{} - for _, tag := range d.repoTags { - // For github.com/docker/docker consumers, this works just as well as - // refString := ref.String() - // because when reading the RepoTags strings, github.com/docker/docker/reference - // normalizes both of them to the same value. - // - // Doing it this way to include the normalized-out `docker.io[/library]` does make - // a difference for github.com/projectatomic/docker consumers, with the - // “Add --add-registry and --block-registry options to docker daemon” patch. - // These consumers treat reference strings which include a hostname and reference - // strings without a hostname differently. - // - // Using the host name here is more explicit about the intent, and it has the same - // effect as (docker pull) in projectatomic/docker, which tags the result using - // a hostname-qualified reference. - // See https://github.com/containers/image/issues/72 for a more detailed - // analysis and explanation. - refString := fmt.Sprintf("%s:%s", tag.Name(), tag.Tag()) - repoTags = append(repoTags, refString) - } - - items := []ManifestItem{{ - Config: d.archive.configPath(man.ConfigDescriptor.Digest), - RepoTags: repoTags, - Layers: layerPaths, - Parent: "", - LayerSources: nil, - }} - itemsBytes, err := json.Marshal(&items) - if err != nil { - return err - } - - return d.archive.sendBytes(manifestFileName, itemsBytes) + return d.archive.createManifest(man.ConfigDescriptor.Digest, layerPaths, d.repoTags) } // PutSignatures would add the given signatures to the docker tarfile (currently not supported). diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index bd19f3c1ca..d4efa2d767 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -4,6 +4,7 @@ import ( "archive/tar" "bytes" "encoding/json" + "fmt" "io" "os" "path/filepath" @@ -142,6 +143,44 @@ func (w *Writer) createRepositoriesFile(rootLayerID string, repoTags []reference return nil } +func (w *Writer) createManifest(configDigest digest.Digest, layerPaths []string, repoTags []reference.NamedTagged) error { + refStrings := []string{} + for _, tag := range repoTags { + // For github.com/docker/docker consumers, this works just as well as + // refString := ref.String() + // because when reading the RepoTags strings, github.com/docker/docker/reference + // normalizes both of them to the same value. + // + // Doing it this way to include the normalized-out `docker.io[/library]` does make + // a difference for github.com/projectatomic/docker consumers, with the + // “Add --add-registry and --block-registry options to docker daemon” patch. + // These consumers treat reference strings which include a hostname and reference + // strings without a hostname differently. + // + // Using the host name here is more explicit about the intent, and it has the same + // effect as (docker pull) in projectatomic/docker, which tags the result using + // a hostname-qualified reference. + // See https://github.com/containers/image/issues/72 for a more detailed + // analysis and explanation. + refString := fmt.Sprintf("%s:%s", tag.Name(), tag.Tag()) + refStrings = append(refStrings, refString) + } + + items := []ManifestItem{{ + Config: w.configPath(configDigest), + RepoTags: refStrings, + Layers: layerPaths, + Parent: "", + LayerSources: nil, + }} + itemsBytes, err := json.Marshal(&items) + if err != nil { + return err + } + + return w.sendBytes(manifestFileName, itemsBytes) +} + // Close writes all outstanding data about images to the archive, and finishes writing data // to the underlying io.Writer. // No more images can be added after this is called. From e5c7ed849b8415451c1b155e7a548f84688a51f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 6 Aug 2020 00:55:30 +0200 Subject: [PATCH 10/35] Reorganize docker/internal/tarfile.Writer.createManifest a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... only to make future commits easier to review. This introduces a bit of inefficiency (creating an on-stack ManifestItem only to copy it into a single-element array), but that will be gone momentarily. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/writer.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index d4efa2d767..2bd53e29d7 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -144,7 +144,14 @@ func (w *Writer) createRepositoriesFile(rootLayerID string, repoTags []reference } func (w *Writer) createManifest(configDigest digest.Digest, layerPaths []string, repoTags []reference.NamedTagged) error { - refStrings := []string{} + item := ManifestItem{ + Config: w.configPath(configDigest), + RepoTags: []string{}, + Layers: layerPaths, + Parent: "", + LayerSources: nil, + } + for _, tag := range repoTags { // For github.com/docker/docker consumers, this works just as well as // refString := ref.String() @@ -163,16 +170,10 @@ func (w *Writer) createManifest(configDigest digest.Digest, layerPaths []string, // See https://github.com/containers/image/issues/72 for a more detailed // analysis and explanation. refString := fmt.Sprintf("%s:%s", tag.Name(), tag.Tag()) - refStrings = append(refStrings, refString) + item.RepoTags = append(item.RepoTags, refString) } - items := []ManifestItem{{ - Config: w.configPath(configDigest), - RepoTags: refStrings, - Layers: layerPaths, - Parent: "", - LayerSources: nil, - }} + items := []ManifestItem{item} itemsBytes, err := json.Marshal(&items) if err != nil { return err From efab5068c9e71bf6f2671e50d2afee03a2b76bcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 6 Aug 2020 01:16:34 +0200 Subject: [PATCH 11/35] Move the computation of layerPaths in docker-archive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... from writeLegacyLayerMetadata to the non-legacy createManifest. Now that we have a dedicated function for computing the path consistently, introducing another reference to it does not hurt maintainability, and the small efficiency gain of computing the path only once is not really worth coupling the legacy/non-legacy code, especially when the legacy code is going to get more complex. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/dest.go | 4 ++-- docker/internal/tarfile/writer.go | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go index 47efc89efe..85bfe58276 100644 --- a/docker/internal/tarfile/dest.go +++ b/docker/internal/tarfile/dest.go @@ -178,7 +178,7 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest return errors.Errorf("Unsupported manifest type, need a Docker schema 2 manifest") } - layerPaths, lastLayerID, err := d.archive.writeLegacyLayerMetadata(man.LayersDescriptors, d.config) + lastLayerID, err := d.archive.writeLegacyLayerMetadata(man.LayersDescriptors, d.config) if err != nil { return err } @@ -189,7 +189,7 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest } } - return d.archive.createManifest(man.ConfigDescriptor.Digest, layerPaths, d.repoTags) + return d.archive.createManifest(man.LayersDescriptors, man.ConfigDescriptor.Digest, d.repoTags) } // PutSignatures would add the given signatures to the docker tarfile (currently not supported). diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index 2bd53e29d7..d439d9c1a5 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -56,7 +56,7 @@ func (w *Writer) recordBlob(info types.BlobInfo) { } // writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers -func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Descriptor, configBytes []byte) (layerPaths []string, lastLayerID string, err error) { +func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Descriptor, configBytes []byte) (lastLayerID string, err error) { var chainID digest.Digest lastLayerID = "" for i, l := range layerDescriptors { @@ -78,17 +78,15 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des layerID := chainID.Hex() physicalLayerPath := w.physicalLayerPath(l.Digest) - // The layer itself has been stored into physicalLayerPath in PutManifest. - // So, use that path for layerPaths used in the non-legacy manifest - layerPaths = append(layerPaths, physicalLayerPath) - // ... and create a symlink for the legacy format; + // Create a symlink for the legacy format, where there is one subdirectory per layer ("image"). + // See also the comment in physicalLayerPath. if err := w.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { - return nil, "", errors.Wrap(err, "Error creating layer symbolic link") + return "", errors.Wrap(err, "Error creating layer symbolic link") } b := []byte("1.0") if err := w.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { - return nil, "", errors.Wrap(err, "Error writing VERSION file") + return "", errors.Wrap(err, "Error writing VERSION file") } // The legacy format requires a config file per layer @@ -104,7 +102,7 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des var config map[string]*json.RawMessage err := json.Unmarshal(configBytes, &config) if err != nil { - return nil, "", errors.Wrap(err, "Error unmarshaling config") + return "", errors.Wrap(err, "Error unmarshaling config") } for _, attr := range [7]string{"architecture", "config", "container", "container_config", "created", "docker_version", "os"} { layerConfig[attr] = config[attr] @@ -112,15 +110,15 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des } b, err := json.Marshal(layerConfig) if err != nil { - return nil, "", errors.Wrap(err, "Error marshaling layer config") + return "", errors.Wrap(err, "Error marshaling layer config") } if err := w.sendBytes(filepath.Join(layerID, legacyConfigFileName), b); err != nil { - return nil, "", errors.Wrap(err, "Error writing config json file") + return "", errors.Wrap(err, "Error writing config json file") } lastLayerID = layerID } - return layerPaths, lastLayerID, nil + return lastLayerID, nil } func (w *Writer) createRepositoriesFile(rootLayerID string, repoTags []reference.NamedTagged) error { @@ -143,7 +141,12 @@ func (w *Writer) createRepositoriesFile(rootLayerID string, repoTags []reference return nil } -func (w *Writer) createManifest(configDigest digest.Digest, layerPaths []string, repoTags []reference.NamedTagged) error { +func (w *Writer) createManifest(layerDescriptors []manifest.Schema2Descriptor, configDigest digest.Digest, repoTags []reference.NamedTagged) error { + layerPaths := []string{} + for _, l := range layerDescriptors { + layerPaths = append(layerPaths, w.physicalLayerPath(l.Digest)) + } + item := ManifestItem{ Config: w.configPath(configDigest), RepoTags: []string{}, From bfe71f6aec9250ac9de192db6def64a2470cbe77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 6 Aug 2020 00:42:38 +0200 Subject: [PATCH 12/35] Implement writing multiple images in the modern format. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NOTE: This is not sufficient to create correct multi-image archives yet, the legacy format is still invalid. This will eventually allow creating multiple Destinations from a single Writer. Should not change behavior for current callers, except that possible JSON failures are now reported later. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/dest.go | 2 +- docker/internal/tarfile/writer.go | 73 +++++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go index 85bfe58276..9a5418703d 100644 --- a/docker/internal/tarfile/dest.go +++ b/docker/internal/tarfile/dest.go @@ -189,7 +189,7 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest } } - return d.archive.createManifest(man.LayersDescriptors, man.ConfigDescriptor.Digest, d.repoTags) + return d.archive.ensureManifestItem(man.LayersDescriptors, man.ConfigDescriptor.Digest, d.repoTags) } // PutSignatures would add the given signatures to the docker tarfile (currently not supported). diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index d439d9c1a5..55088f49e1 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -23,16 +23,19 @@ type Writer struct { writer io.Writer tar *tar.Writer // Other state. - blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs + blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs + manifest []ManifestItem + manifestByConfig map[digest.Digest]int // A map from config digest to an entry index in manifest above. } // NewWriter returns a Writer for the specified io.Writer. // The caller must eventually call .Close() on the returned object to create a valid archive. func NewWriter(dest io.Writer) *Writer { return &Writer{ - writer: dest, - tar: tar.NewWriter(dest), - blobs: make(map[digest.Digest]types.BlobInfo), + writer: dest, + tar: tar.NewWriter(dest), + blobs: make(map[digest.Digest]types.BlobInfo), + manifestByConfig: map[digest.Digest]int{}, } } @@ -141,20 +144,56 @@ func (w *Writer) createRepositoriesFile(rootLayerID string, repoTags []reference return nil } -func (w *Writer) createManifest(layerDescriptors []manifest.Schema2Descriptor, configDigest digest.Digest, repoTags []reference.NamedTagged) error { +// checkManifestItemsMatch checks that a and b describe the same image, +// and returns an error if that’s not the case (which should never happen). +func checkManifestItemsMatch(a, b *ManifestItem) error { + if a.Config != b.Config { + return fmt.Errorf("Internal error: Trying to reuse ManifestItem values with configs %#v vs. %#v", a.Config, b.Config) + } + if len(a.Layers) != len(b.Layers) { + return fmt.Errorf("Internal error: Trying to reuse ManifestItem values with layers %#v vs. %#v", a.Layers, b.Layers) + } + for i := range a.Layers { + if a.Layers[i] != b.Layers[i] { + return fmt.Errorf("Internal error: Trying to reuse ManifestItem values with layers[i] %#v vs. %#v", a.Layers[i], b.Layers[i]) + } + } + // Ignore RepoTags, that will be built later. + // Ignore Parent and LayerSources, which we don’t set to anything meaningful. + return nil +} + +// ensureManifestItem ensures that there is a manifest item pointing to (layerDescriptors, configDigest) with repoTags +func (w *Writer) ensureManifestItem(layerDescriptors []manifest.Schema2Descriptor, configDigest digest.Digest, repoTags []reference.NamedTagged) error { layerPaths := []string{} for _, l := range layerDescriptors { layerPaths = append(layerPaths, w.physicalLayerPath(l.Digest)) } - item := ManifestItem{ + var item *ManifestItem + newItem := ManifestItem{ Config: w.configPath(configDigest), RepoTags: []string{}, Layers: layerPaths, - Parent: "", + Parent: "", // We don’t have this information LayerSources: nil, } + if i, ok := w.manifestByConfig[configDigest]; ok { + item = &w.manifest[i] + if err := checkManifestItemsMatch(item, &newItem); err != nil { + return err + } + } else { + i := len(w.manifest) + w.manifestByConfig[configDigest] = i + w.manifest = append(w.manifest, newItem) + item = &w.manifest[i] + } + knownRepoTags := map[string]struct{}{} + for _, repoTag := range item.RepoTags { + knownRepoTags[repoTag] = struct{}{} + } for _, tag := range repoTags { // For github.com/docker/docker consumers, this works just as well as // refString := ref.String() @@ -173,22 +212,28 @@ func (w *Writer) createManifest(layerDescriptors []manifest.Schema2Descriptor, c // See https://github.com/containers/image/issues/72 for a more detailed // analysis and explanation. refString := fmt.Sprintf("%s:%s", tag.Name(), tag.Tag()) - item.RepoTags = append(item.RepoTags, refString) - } - items := []ManifestItem{item} - itemsBytes, err := json.Marshal(&items) - if err != nil { - return err + if _, ok := knownRepoTags[refString]; !ok { + item.RepoTags = append(item.RepoTags, refString) + knownRepoTags[refString] = struct{}{} + } } - return w.sendBytes(manifestFileName, itemsBytes) + return nil } // Close writes all outstanding data about images to the archive, and finishes writing data // to the underlying io.Writer. // No more images can be added after this is called. func (w *Writer) Close() error { + b, err := json.Marshal(&w.manifest) + if err != nil { + return err + } + if err := w.sendBytes(manifestFileName, b); err != nil { + return err + } + return w.tar.Close() } From 4f13a6b5a33a65a32b2825529c1d40c6863dfc8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 6 Aug 2020 01:58:21 +0200 Subject: [PATCH 13/35] Split createSingleLegacyLayer from writeLegacyLayerMetadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... so that we can later only do this for layers that haven't been created yet. Should not change behavior, apart from timing of some reported errors. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/writer.go | 39 +++++++++++++++++++------------ 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index 55088f49e1..ae36ac45ca 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -58,6 +58,26 @@ func (w *Writer) recordBlob(info types.BlobInfo) { w.blobs[info.Digest] = info } +// createSingleLegacyLayer writes legacy VERSION and configuration files for a single layer +func (w *Writer) createSingleLegacyLayer(layerID string, layerDigest digest.Digest, configBytes []byte) error { + // Create a symlink for the legacy format, where there is one subdirectory per layer ("image"). + // See also the comment in physicalLayerPath. + physicalLayerPath := w.physicalLayerPath(layerDigest) + if err := w.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { + return errors.Wrap(err, "Error creating layer symbolic link") + } + + b := []byte("1.0") + if err := w.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { + return errors.Wrap(err, "Error writing VERSION file") + } + + if err := w.sendBytes(filepath.Join(layerID, legacyConfigFileName), configBytes); err != nil { + return errors.Wrap(err, "Error writing config json file") + } + return nil +} + // writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Descriptor, configBytes []byte) (lastLayerID string, err error) { var chainID digest.Digest @@ -80,18 +100,6 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des // configuration). layerID := chainID.Hex() - physicalLayerPath := w.physicalLayerPath(l.Digest) - // Create a symlink for the legacy format, where there is one subdirectory per layer ("image"). - // See also the comment in physicalLayerPath. - if err := w.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { - return "", errors.Wrap(err, "Error creating layer symbolic link") - } - - b := []byte("1.0") - if err := w.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { - return "", errors.Wrap(err, "Error writing VERSION file") - } - // The legacy format requires a config file per layer layerConfig := make(map[string]interface{}) layerConfig["id"] = layerID @@ -111,12 +119,13 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des layerConfig[attr] = config[attr] } } - b, err := json.Marshal(layerConfig) + configBytes, err := json.Marshal(layerConfig) if err != nil { return "", errors.Wrap(err, "Error marshaling layer config") } - if err := w.sendBytes(filepath.Join(layerID, legacyConfigFileName), b); err != nil { - return "", errors.Wrap(err, "Error writing config json file") + + if err := w.createSingleLegacyLayer(layerID, l.Digest, configBytes); err != nil { + return "", err } lastLayerID = layerID From 29ddfddb96d6adbc919bface9974ffe2cc13888e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 6 Aug 2020 02:05:13 +0200 Subject: [PATCH 14/35] Move legacy layer ID computation to a bit later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... because it will soon depend on the rest of layerConfig. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/writer.go | 37 ++++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index ae36ac45ca..9dfeea6684 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -83,26 +83,8 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des var chainID digest.Digest lastLayerID = "" for i, l := range layerDescriptors { - // This chainID value matches the computation in docker/docker/layer.CreateChainID … - if chainID == "" { - chainID = l.Digest - } else { - chainID = digest.Canonical.FromString(chainID.String() + " " + l.Digest.String()) - } - // … but note that this image ID does not match docker/docker/image/v1.CreateID. At least recent - // versions allocate new IDs on load, as long as the IDs we use are unique / cannot loop. - // - // Overall, the goal of computing a digest dependent on the full history is to avoid reusing an image ID - // (and possibly creating a loop in the "parent" links) if a layer with the same DiffID appears two or more - // times in layersDescriptors. The ChainID values are sufficient for this, the v1.CreateID computation - // which also mixes in the full image configuration seems unnecessary, at least as long as we are storing - // only a single image per tarball, i.e. all DiffID prefixes are unique (can’t differ only with - // configuration). - layerID := chainID.Hex() - // The legacy format requires a config file per layer layerConfig := make(map[string]interface{}) - layerConfig["id"] = layerID // The root layer doesn't have any parent if lastLayerID != "" { @@ -119,6 +101,25 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des layerConfig[attr] = config[attr] } } + + // This chainID value matches the computation in docker/docker/layer.CreateChainID … + if chainID == "" { + chainID = l.Digest + } else { + chainID = digest.Canonical.FromString(chainID.String() + " " + l.Digest.String()) + } + // … but note that this image ID does not match docker/docker/image/v1.CreateID. At least recent + // versions allocate new IDs on load, as long as the IDs we use are unique / cannot loop. + // + // Overall, the goal of computing a digest dependent on the full history is to avoid reusing an image ID + // (and possibly creating a loop in the "parent" links) if a layer with the same DiffID appears two or more + // times in layersDescriptors. The ChainID values are sufficient for this, the v1.CreateID computation + // which also mixes in the full image configuration seems unnecessary, at least as long as we are storing + // only a single image per tarball, i.e. all DiffID prefixes are unique (can’t differ only with + // configuration). + layerID := chainID.Hex() + layerConfig["id"] = layerID + configBytes, err := json.Marshal(layerConfig) if err != nil { return "", errors.Wrap(err, "Error marshaling layer config") From 3bef56d9e746400e8767c9d832142a29bacadd64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 6 Aug 2020 02:33:10 +0200 Subject: [PATCH 15/35] Merge writeLegacyMetadata and createRepositoriesFile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... so that the caller does not have to care about lastLayerID and empty images. This preserves the current behavior of silently ignoring tags intended for empty images. That's probably not quite right, but also not a subject of this PR. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/dest.go | 9 +-------- docker/internal/tarfile/writer.go | 29 ++++++++++++++--------------- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go index 9a5418703d..f811855817 100644 --- a/docker/internal/tarfile/dest.go +++ b/docker/internal/tarfile/dest.go @@ -178,17 +178,10 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest return errors.Errorf("Unsupported manifest type, need a Docker schema 2 manifest") } - lastLayerID, err := d.archive.writeLegacyLayerMetadata(man.LayersDescriptors, d.config) - if err != nil { + if err := d.archive.writeLegacyMetadata(man.LayersDescriptors, d.config, d.repoTags); err != nil { return err } - if len(man.LayersDescriptors) > 0 { - if err := d.archive.createRepositoriesFile(lastLayerID, d.repoTags); err != nil { - return err - } - } - return d.archive.ensureManifestItem(man.LayersDescriptors, man.ConfigDescriptor.Digest, d.repoTags) } diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index 9dfeea6684..8dadecc015 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -78,10 +78,10 @@ func (w *Writer) createSingleLegacyLayer(layerID string, layerDigest digest.Dige return nil } -// writeLegacyLayerMetadata writes legacy VERSION and configuration files for all layers -func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Descriptor, configBytes []byte) (lastLayerID string, err error) { +// writeLegacyMetadata writes legacy layer metadata and records tags for a single image. +func (w *Writer) writeLegacyMetadata(layerDescriptors []manifest.Schema2Descriptor, configBytes []byte, repoTags []reference.NamedTagged) error { var chainID digest.Digest - lastLayerID = "" + lastLayerID := "" for i, l := range layerDescriptors { // The legacy format requires a config file per layer layerConfig := make(map[string]interface{}) @@ -95,7 +95,7 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des var config map[string]*json.RawMessage err := json.Unmarshal(configBytes, &config) if err != nil { - return "", errors.Wrap(err, "Error unmarshaling config") + return errors.Wrap(err, "Error unmarshaling config") } for _, attr := range [7]string{"architecture", "config", "container", "container_config", "created", "docker_version", "os"} { layerConfig[attr] = config[attr] @@ -122,25 +122,24 @@ func (w *Writer) writeLegacyLayerMetadata(layerDescriptors []manifest.Schema2Des configBytes, err := json.Marshal(layerConfig) if err != nil { - return "", errors.Wrap(err, "Error marshaling layer config") + return errors.Wrap(err, "Error marshaling layer config") } if err := w.createSingleLegacyLayer(layerID, l.Digest, configBytes); err != nil { - return "", err + return err } lastLayerID = layerID } - return lastLayerID, nil -} -func (w *Writer) createRepositoriesFile(rootLayerID string, repoTags []reference.NamedTagged) error { repositories := map[string]map[string]string{} - for _, repoTag := range repoTags { - if val, ok := repositories[repoTag.Name()]; ok { - val[repoTag.Tag()] = rootLayerID - } else { - repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): rootLayerID} + if lastLayerID != "" { + for _, repoTag := range repoTags { + if val, ok := repositories[repoTag.Name()]; ok { + val[repoTag.Tag()] = lastLayerID + } else { + repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): lastLayerID} + } } } @@ -260,7 +259,7 @@ func (w *Writer) configPath(configDigest digest.Digest) string { // any time. func (w *Writer) physicalLayerPath(layerDigest digest.Digest) string { // Note that this can't be e.g. filepath.Join(l.Digest.Hex(), legacyLayerFileName); due to the way - // writeLegacyLayerMetadata constructs layer IDs differently from inputinfo.Digest values (as described + // writeLegacyMetadata constructs layer IDs differently from inputinfo.Digest values (as described // inside it), most of the layers would end up in subdirectories alone without any metadata; (docker load) // tries to load every subdirectory as an image and fails if the config is missing. So, keep the layers // in the root of the tarball. From 075bc068fc5a39b3a3651293e55456a8c9fa0682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 29 Jul 2020 21:34:00 +0200 Subject: [PATCH 16/35] Implement writing multiple images in the legacy format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that both the legacy and non-legacy format can incrementally add images to a single archive, this will allow creating multiple Destinations from a writer Writer. Image IDs are now generated differently; that may be observable by very old Docker versions. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/writer.go | 82 ++++++++++++++++++------------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index 8dadecc015..0cff3a4caa 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -24,6 +24,8 @@ type Writer struct { tar *tar.Writer // Other state. blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs + repositories map[string]map[string]string + legacyLayers map[string]struct{} // A set of IDs of legacy layers that have been already sent. manifest []ManifestItem manifestByConfig map[digest.Digest]int // A map from config digest to an entry index in manifest above. } @@ -35,6 +37,8 @@ func NewWriter(dest io.Writer) *Writer { writer: dest, tar: tar.NewWriter(dest), blobs: make(map[digest.Digest]types.BlobInfo), + repositories: map[string]map[string]string{}, + legacyLayers: map[string]struct{}{}, manifestByConfig: map[digest.Digest]int{}, } } @@ -58,22 +62,26 @@ func (w *Writer) recordBlob(info types.BlobInfo) { w.blobs[info.Digest] = info } -// createSingleLegacyLayer writes legacy VERSION and configuration files for a single layer -func (w *Writer) createSingleLegacyLayer(layerID string, layerDigest digest.Digest, configBytes []byte) error { - // Create a symlink for the legacy format, where there is one subdirectory per layer ("image"). - // See also the comment in physicalLayerPath. - physicalLayerPath := w.physicalLayerPath(layerDigest) - if err := w.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { - return errors.Wrap(err, "Error creating layer symbolic link") - } +// ensureSingleLegacyLayer writes legacy VERSION and configuration files for a single layer +func (w *Writer) ensureSingleLegacyLayer(layerID string, layerDigest digest.Digest, configBytes []byte) error { + if _, ok := w.legacyLayers[layerID]; !ok { + // Create a symlink for the legacy format, where there is one subdirectory per layer ("image"). + // See also the comment in physicalLayerPath. + physicalLayerPath := w.physicalLayerPath(layerDigest) + if err := w.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { + return errors.Wrap(err, "Error creating layer symbolic link") + } - b := []byte("1.0") - if err := w.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { - return errors.Wrap(err, "Error writing VERSION file") - } + b := []byte("1.0") + if err := w.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { + return errors.Wrap(err, "Error writing VERSION file") + } - if err := w.sendBytes(filepath.Join(layerID, legacyConfigFileName), configBytes); err != nil { - return errors.Wrap(err, "Error writing config json file") + if err := w.sendBytes(filepath.Join(layerID, legacyConfigFileName), configBytes); err != nil { + return errors.Wrap(err, "Error writing config json file") + } + + w.legacyLayers[layerID] = struct{}{} } return nil } @@ -108,16 +116,21 @@ func (w *Writer) writeLegacyMetadata(layerDescriptors []manifest.Schema2Descript } else { chainID = digest.Canonical.FromString(chainID.String() + " " + l.Digest.String()) } - // … but note that this image ID does not match docker/docker/image/v1.CreateID. At least recent - // versions allocate new IDs on load, as long as the IDs we use are unique / cannot loop. + // … but note that the image ID does not _exactly_ match docker/docker/image/v1.CreateID, primarily because + // we create the image configs differently in details. At least recent versions allocate new IDs on load, + // so this is fine as long as the IDs we use are unique / cannot loop. // - // Overall, the goal of computing a digest dependent on the full history is to avoid reusing an image ID - // (and possibly creating a loop in the "parent" links) if a layer with the same DiffID appears two or more - // times in layersDescriptors. The ChainID values are sufficient for this, the v1.CreateID computation - // which also mixes in the full image configuration seems unnecessary, at least as long as we are storing - // only a single image per tarball, i.e. all DiffID prefixes are unique (can’t differ only with - // configuration). - layerID := chainID.Hex() + // For intermediate images, we could just use the chainID as an image ID, but using a digest of ~the created + // config makes sure that everything uses the same “namespace”; a bit less efficient but clearer. + // + // Temporarily add the chainID to the config, only for the purpose of generating the image ID. + layerConfig["layer_id"] = chainID + b, err := json.Marshal(layerConfig) // Note that layerConfig["id"] is not set yet at this point. + if err != nil { + return errors.Wrap(err, "Error marshaling layer config") + } + delete(layerConfig, "layer_id") + layerID := digest.Canonical.FromBytes(b).Hex() layerConfig["id"] = layerID configBytes, err := json.Marshal(layerConfig) @@ -125,31 +138,22 @@ func (w *Writer) writeLegacyMetadata(layerDescriptors []manifest.Schema2Descript return errors.Wrap(err, "Error marshaling layer config") } - if err := w.createSingleLegacyLayer(layerID, l.Digest, configBytes); err != nil { + if err := w.ensureSingleLegacyLayer(layerID, l.Digest, configBytes); err != nil { return err } lastLayerID = layerID } - repositories := map[string]map[string]string{} if lastLayerID != "" { for _, repoTag := range repoTags { - if val, ok := repositories[repoTag.Name()]; ok { + if val, ok := w.repositories[repoTag.Name()]; ok { val[repoTag.Tag()] = lastLayerID } else { - repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): lastLayerID} + w.repositories[repoTag.Name()] = map[string]string{repoTag.Tag(): lastLayerID} } } } - - b, err := json.Marshal(repositories) - if err != nil { - return errors.Wrap(err, "Error marshaling repositories") - } - if err := w.sendBytes(legacyRepositoriesFileName, b); err != nil { - return errors.Wrap(err, "Error writing config json file") - } return nil } @@ -243,6 +247,14 @@ func (w *Writer) Close() error { return err } + b, err = json.Marshal(w.repositories) + if err != nil { + return errors.Wrap(err, "Error marshaling repositories") + } + if err := w.sendBytes(legacyRepositoriesFileName, b); err != nil { + return errors.Wrap(err, "Error writing config json file") + } + return w.tar.Close() } From c81a53643cdcf7404785d8556fb36cae3b7787a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 29 Jul 2020 21:46:15 +0200 Subject: [PATCH 17/35] Separate tarfile.Writer creation from Destination creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least docker/archive will need to deal with the two optionally separately, so such a separate constructor must exist; and the other callers are not much more complex if they separate it as well. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/archive/dest.go | 7 +++++-- docker/daemon/daemon_dest.go | 7 +++++-- docker/internal/tarfile/dest.go | 12 +++--------- docker/tarfile/dest.go | 9 +++++++-- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/docker/archive/dest.go b/docker/archive/dest.go index 12469ed963..20a525408c 100644 --- a/docker/archive/dest.go +++ b/docker/archive/dest.go @@ -13,6 +13,7 @@ import ( type archiveImageDestination struct { *tarfile.Destination // Implements most of types.ImageDestination ref archiveReference + archive *tarfile.Writer writer io.Closer } @@ -36,13 +37,15 @@ func newImageDestination(sys *types.SystemContext, ref archiveReference) (types. return nil, errors.New("docker-archive doesn't support modifying existing images") } - tarDest := tarfile.NewDestination(sys, fh, ref.destinationRef) + archive := tarfile.NewWriter(fh) + tarDest := tarfile.NewDestination(sys, archive, ref.destinationRef) if sys != nil && sys.DockerArchiveAdditionalTags != nil { tarDest.AddRepoTags(sys.DockerArchiveAdditionalTags) } return &archiveImageDestination{ Destination: tarDest, ref: ref, + archive: archive, writer: fh, }, nil } @@ -68,5 +71,5 @@ func (d *archiveImageDestination) Close() error { // - Uploaded data MAY be visible to others before Commit() is called // - Uploaded data MAY be removed or MAY remain around if Close() is called without Commit() (i.e. rollback is allowed but not guaranteed) func (d *archiveImageDestination) Commit(ctx context.Context, unparsedToplevel types.UnparsedImage) error { - return d.Destination.Commit(ctx) + return d.archive.Close() } diff --git a/docker/daemon/daemon_dest.go b/docker/daemon/daemon_dest.go index 1a7a1dc8d3..88609b3dc7 100644 --- a/docker/daemon/daemon_dest.go +++ b/docker/daemon/daemon_dest.go @@ -16,6 +16,7 @@ type daemonImageDestination struct { ref daemonReference mustMatchRuntimeOS bool *tarfile.Destination // Implements most of types.ImageDestination + archive *tarfile.Writer // For talking to imageLoadGoroutine goroutineCancel context.CancelFunc statusChannel <-chan error @@ -45,6 +46,7 @@ func newImageDestination(ctx context.Context, sys *types.SystemContext, ref daem } reader, writer := io.Pipe() + archive := tarfile.NewWriter(writer) // Commit() may never be called, so we may never read from this channel; so, make this buffered to allow imageLoadGoroutine to write status and terminate even if we never read it. statusChannel := make(chan error, 1) @@ -54,7 +56,8 @@ func newImageDestination(ctx context.Context, sys *types.SystemContext, ref daem return &daemonImageDestination{ ref: ref, mustMatchRuntimeOS: mustMatchRuntimeOS, - Destination: tarfile.NewDestination(sys, writer, namedTaggedRef), + Destination: tarfile.NewDestination(sys, archive, namedTaggedRef), + archive: archive, goroutineCancel: goroutineCancel, statusChannel: statusChannel, writer: writer, @@ -130,7 +133,7 @@ func (d *daemonImageDestination) Reference() types.ImageReference { // - Uploaded data MAY be removed or MAY remain around if Close() is called without Commit() (i.e. rollback is allowed but not guaranteed) func (d *daemonImageDestination) Commit(ctx context.Context, unparsedToplevel types.UnparsedImage) error { logrus.Debugf("docker-daemon: Closing tar stream") - if err := d.Destination.Commit(ctx); err != nil { + if err := d.archive.Close(); err != nil { return err } if err := d.writer.Close(); err != nil { diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go index f811855817..299fd4acdd 100644 --- a/docker/internal/tarfile/dest.go +++ b/docker/internal/tarfile/dest.go @@ -27,14 +27,14 @@ type Destination struct { sysCtx *types.SystemContext } -// NewDestination returns a tarfile.Destination for the specified io.Writer. -func NewDestination(sys *types.SystemContext, dest io.Writer, ref reference.NamedTagged) *Destination { +// NewDestination returns a tarfile.Destination adding images to the specified Writer. +func NewDestination(sys *types.SystemContext, archive *Writer, ref reference.NamedTagged) *Destination { repoTags := []reference.NamedTagged{} if ref != nil { repoTags = append(repoTags, ref) } return &Destination{ - archive: NewWriter(dest), + archive: archive, repoTags: repoTags, sysCtx: sys, } @@ -197,9 +197,3 @@ func (d *Destination) PutSignatures(ctx context.Context, signatures [][]byte, in } return nil } - -// Commit finishes writing data to the underlying io.Writer. -// It is the caller's responsibility to close it, if necessary. -func (d *Destination) Commit(ctx context.Context) error { - return d.archive.Close() -} diff --git a/docker/tarfile/dest.go b/docker/tarfile/dest.go index cffb497f59..af1690683f 100644 --- a/docker/tarfile/dest.go +++ b/docker/tarfile/dest.go @@ -13,6 +13,7 @@ import ( // Destination is a partial implementation of types.ImageDestination for writing to an io.Writer. type Destination struct { internal *internal.Destination + archive *internal.Writer } // NewDestination returns a tarfile.Destination for the specified io.Writer. @@ -23,7 +24,11 @@ func NewDestination(dest io.Writer, ref reference.NamedTagged) *Destination { // NewDestinationWithContext returns a tarfile.Destination for the specified io.Writer. func NewDestinationWithContext(sys *types.SystemContext, dest io.Writer, ref reference.NamedTagged) *Destination { - return &Destination{internal: internal.NewDestination(sys, dest, ref)} + archive := internal.NewWriter(dest) + return &Destination{ + internal: internal.NewDestination(sys, archive, ref), + archive: archive, + } } // AddRepoTags adds the specified tags to the destination's repoTags. @@ -108,5 +113,5 @@ func (d *Destination) PutSignatures(ctx context.Context, signatures [][]byte, in // Commit finishes writing data to the underlying io.Writer. // It is the caller's responsibility to close it, if necessary. func (d *Destination) Commit(ctx context.Context) error { - return d.internal.Commit(ctx) + return d.archive.Close() } From 28fea182abc83a4b868a10ce0b7c00874ce07118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 7 Aug 2020 01:07:09 +0200 Subject: [PATCH 18/35] Lock docker/internal/tarfile.Writer to support concurrent uses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This does not really allow _concurrency_ because we are streaming the data to a single io.Writer, but it gives us safety against concurrent callers (hypothetically when copying multiple images, at least). Note that we do not currently set HasThreadSafePutBlob, although we could, because the benefit is probably fairly small (basically it would parallelize creating on-disk copies of streamed inputs with unknown size or digest); that might be a wrong guess. Also adds a sanity check against using the Writer after Finish(). Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/dest.go | 32 +++++++++--- docker/internal/tarfile/writer.go | 86 ++++++++++++++++++++++--------- 2 files changed, 87 insertions(+), 31 deletions(-) diff --git a/docker/internal/tarfile/dest.go b/docker/internal/tarfile/dest.go index 299fd4acdd..8c38094cfa 100644 --- a/docker/internal/tarfile/dest.go +++ b/docker/internal/tarfile/dest.go @@ -79,6 +79,9 @@ func (d *Destination) IgnoresEmbeddedDockerReference() bool { // HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently. func (d *Destination) HasThreadSafePutBlob() bool { + // The code _is_ actually thread-safe, but apart from computing sizes/digests of layers where + // this is unknown in advance, the actual copy is serialized by d.archive, so there probably isn’t + // much benefit from concurrency, mostly just extra CPU, memory and I/O contention. return false } @@ -120,8 +123,13 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t logrus.Debugf("... streaming done") } + if err := d.archive.lock(); err != nil { + return types.BlobInfo{}, err + } + defer d.archive.unlock() + // Maybe the blob has been already sent - ok, reusedInfo, err := d.archive.tryReusingBlob(inputInfo) + ok, reusedInfo, err := d.archive.tryReusingBlobLocked(inputInfo) if err != nil { return types.BlobInfo{}, err } @@ -135,15 +143,15 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t return types.BlobInfo{}, errors.Wrap(err, "Error reading Config file stream") } d.config = buf - if err := d.archive.sendFile(d.archive.configPath(inputInfo.Digest), inputInfo.Size, bytes.NewReader(buf)); err != nil { + if err := d.archive.sendFileLocked(d.archive.configPath(inputInfo.Digest), inputInfo.Size, bytes.NewReader(buf)); err != nil { return types.BlobInfo{}, errors.Wrap(err, "Error writing Config file") } } else { - if err := d.archive.sendFile(d.archive.physicalLayerPath(inputInfo.Digest), inputInfo.Size, stream); err != nil { + if err := d.archive.sendFileLocked(d.archive.physicalLayerPath(inputInfo.Digest), inputInfo.Size, stream); err != nil { return types.BlobInfo{}, err } } - d.archive.recordBlob(types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size}) + d.archive.recordBlobLocked(types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size}) return types.BlobInfo{Digest: inputInfo.Digest, Size: inputInfo.Size}, nil } @@ -155,7 +163,12 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. // May use and/or update cache. func (d *Destination) TryReusingBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) { - return d.archive.tryReusingBlob(info) + if err := d.archive.lock(); err != nil { + return false, types.BlobInfo{}, err + } + defer d.archive.unlock() + + return d.archive.tryReusingBlobLocked(info) } // PutManifest writes manifest to the destination. @@ -178,11 +191,16 @@ func (d *Destination) PutManifest(ctx context.Context, m []byte, instanceDigest return errors.Errorf("Unsupported manifest type, need a Docker schema 2 manifest") } - if err := d.archive.writeLegacyMetadata(man.LayersDescriptors, d.config, d.repoTags); err != nil { + if err := d.archive.lock(); err != nil { + return err + } + defer d.archive.unlock() + + if err := d.archive.writeLegacyMetadataLocked(man.LayersDescriptors, d.config, d.repoTags); err != nil { return err } - return d.archive.ensureManifestItem(man.LayersDescriptors, man.ConfigDescriptor.Digest, d.repoTags) + return d.archive.ensureManifestItemLocked(man.LayersDescriptors, man.ConfigDescriptor.Digest, d.repoTags) } // PutSignatures would add the given signatures to the docker tarfile (currently not supported). diff --git a/docker/internal/tarfile/writer.go b/docker/internal/tarfile/writer.go index 0cff3a4caa..fd2c461d07 100644 --- a/docker/internal/tarfile/writer.go +++ b/docker/internal/tarfile/writer.go @@ -8,6 +8,7 @@ import ( "io" "os" "path/filepath" + "sync" "time" "github.com/containers/image/v5/docker/reference" @@ -20,8 +21,11 @@ import ( // Writer allows creating a (docker save)-formatted tar archive containing one or more images. type Writer struct { + mutex sync.Mutex + // ALL of the following members can only be accessed with the mutex held. + // Use Writer.lock() to obtain the mutex. writer io.Writer - tar *tar.Writer + tar *tar.Writer // nil if the Writer has already been closed. // Other state. blobs map[digest.Digest]types.BlobInfo // list of already-sent blobs repositories map[string]map[string]string @@ -43,11 +47,30 @@ func NewWriter(dest io.Writer) *Writer { } } -// tryReusingBlob checks whether the transport already contains, a blob, and if so, returns its metadata. +// lock does some sanity checks and locks the Writer. +// If this function suceeds, the caller must call w.unlock. +// Do not use Writer.mutex directly. +func (w *Writer) lock() error { + w.mutex.Lock() + if w.tar == nil { + w.mutex.Unlock() + return errors.New("Internal error: trying to use an already closed tarfile.Writer") + } + return nil +} + +// unlock releases the lock obtained by Writer.lock +// Do not use Writer.mutex directly. +func (w *Writer) unlock() { + w.mutex.Unlock() +} + +// tryReusingBlobLocked checks whether the transport already contains, a blob, and if so, returns its metadata. // info.Digest must not be empty. // If the blob has been succesfully reused, returns (true, info, nil); info must contain at least a digest and size. // If the transport can not reuse the requested blob, tryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure. -func (w *Writer) tryReusingBlob(info types.BlobInfo) (bool, types.BlobInfo, error) { +// The caller must have locked the Writer. +func (w *Writer) tryReusingBlobLocked(info types.BlobInfo) (bool, types.BlobInfo, error) { if info.Digest == "" { return false, types.BlobInfo{}, errors.Errorf("Can not check for a blob with unknown digest") } @@ -58,26 +81,28 @@ func (w *Writer) tryReusingBlob(info types.BlobInfo) (bool, types.BlobInfo, erro } // recordBlob records metadata of a recorded blob, which must contain at least a digest and size. -func (w *Writer) recordBlob(info types.BlobInfo) { +// The caller must have locked the Writer. +func (w *Writer) recordBlobLocked(info types.BlobInfo) { w.blobs[info.Digest] = info } -// ensureSingleLegacyLayer writes legacy VERSION and configuration files for a single layer -func (w *Writer) ensureSingleLegacyLayer(layerID string, layerDigest digest.Digest, configBytes []byte) error { +// ensureSingleLegacyLayerLocked writes legacy VERSION and configuration files for a single layer +// The caller must have locked the Writer. +func (w *Writer) ensureSingleLegacyLayerLocked(layerID string, layerDigest digest.Digest, configBytes []byte) error { if _, ok := w.legacyLayers[layerID]; !ok { // Create a symlink for the legacy format, where there is one subdirectory per layer ("image"). // See also the comment in physicalLayerPath. physicalLayerPath := w.physicalLayerPath(layerDigest) - if err := w.sendSymlink(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { + if err := w.sendSymlinkLocked(filepath.Join(layerID, legacyLayerFileName), filepath.Join("..", physicalLayerPath)); err != nil { return errors.Wrap(err, "Error creating layer symbolic link") } b := []byte("1.0") - if err := w.sendBytes(filepath.Join(layerID, legacyVersionFileName), b); err != nil { + if err := w.sendBytesLocked(filepath.Join(layerID, legacyVersionFileName), b); err != nil { return errors.Wrap(err, "Error writing VERSION file") } - if err := w.sendBytes(filepath.Join(layerID, legacyConfigFileName), configBytes); err != nil { + if err := w.sendBytesLocked(filepath.Join(layerID, legacyConfigFileName), configBytes); err != nil { return errors.Wrap(err, "Error writing config json file") } @@ -86,8 +111,8 @@ func (w *Writer) ensureSingleLegacyLayer(layerID string, layerDigest digest.Dige return nil } -// writeLegacyMetadata writes legacy layer metadata and records tags for a single image. -func (w *Writer) writeLegacyMetadata(layerDescriptors []manifest.Schema2Descriptor, configBytes []byte, repoTags []reference.NamedTagged) error { +// writeLegacyMetadataLocked writes legacy layer metadata and records tags for a single image. +func (w *Writer) writeLegacyMetadataLocked(layerDescriptors []manifest.Schema2Descriptor, configBytes []byte, repoTags []reference.NamedTagged) error { var chainID digest.Digest lastLayerID := "" for i, l := range layerDescriptors { @@ -138,7 +163,7 @@ func (w *Writer) writeLegacyMetadata(layerDescriptors []manifest.Schema2Descript return errors.Wrap(err, "Error marshaling layer config") } - if err := w.ensureSingleLegacyLayer(layerID, l.Digest, configBytes); err != nil { + if err := w.ensureSingleLegacyLayerLocked(layerID, l.Digest, configBytes); err != nil { return err } @@ -176,8 +201,9 @@ func checkManifestItemsMatch(a, b *ManifestItem) error { return nil } -// ensureManifestItem ensures that there is a manifest item pointing to (layerDescriptors, configDigest) with repoTags -func (w *Writer) ensureManifestItem(layerDescriptors []manifest.Schema2Descriptor, configDigest digest.Digest, repoTags []reference.NamedTagged) error { +// ensureManifestItemLocked ensures that there is a manifest item pointing to (layerDescriptors, configDigest) with repoTags +// The caller must have locked the Writer. +func (w *Writer) ensureManifestItemLocked(layerDescriptors []manifest.Schema2Descriptor, configDigest digest.Digest, repoTags []reference.NamedTagged) error { layerPaths := []string{} for _, l := range layerDescriptors { layerPaths = append(layerPaths, w.physicalLayerPath(l.Digest)) @@ -239,11 +265,16 @@ func (w *Writer) ensureManifestItem(layerDescriptors []manifest.Schema2Descripto // to the underlying io.Writer. // No more images can be added after this is called. func (w *Writer) Close() error { + if err := w.lock(); err != nil { + return err + } + defer w.unlock() + b, err := json.Marshal(&w.manifest) if err != nil { return err } - if err := w.sendBytes(manifestFileName, b); err != nil { + if err := w.sendBytesLocked(manifestFileName, b); err != nil { return err } @@ -251,11 +282,15 @@ func (w *Writer) Close() error { if err != nil { return errors.Wrap(err, "Error marshaling repositories") } - if err := w.sendBytes(legacyRepositoriesFileName, b); err != nil { + if err := w.sendBytesLocked(legacyRepositoriesFileName, b); err != nil { return errors.Wrap(err, "Error writing config json file") } - return w.tar.Close() + if err := w.tar.Close(); err != nil { + return err + } + w.tar = nil // Mark the Writer as closed. + return nil } // configPath returns a path we choose for storing a config with the specified digest. @@ -306,8 +341,9 @@ func (t *tarFI) Sys() interface{} { return nil } -// sendSymlink sends a symlink into the tar stream. -func (w *Writer) sendSymlink(path string, target string) error { +// sendSymlinkLocked sends a symlink into the tar stream. +// The caller must have locked the Writer. +func (w *Writer) sendSymlinkLocked(path string, target string) error { hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: 0, isSymlink: true}, target) if err != nil { return nil @@ -316,13 +352,15 @@ func (w *Writer) sendSymlink(path string, target string) error { return w.tar.WriteHeader(hdr) } -// sendBytes sends a path into the tar stream. -func (w *Writer) sendBytes(path string, b []byte) error { - return w.sendFile(path, int64(len(b)), bytes.NewReader(b)) +// sendBytesLocked sends a path into the tar stream. +// The caller must have locked the Writer. +func (w *Writer) sendBytesLocked(path string, b []byte) error { + return w.sendFileLocked(path, int64(len(b)), bytes.NewReader(b)) } -// sendFile sends a file into the tar stream. -func (w *Writer) sendFile(path string, expectedSize int64, stream io.Reader) error { +// sendFileLocked sends a file into the tar stream. +// The caller must have locked the Writer. +func (w *Writer) sendFileLocked(path string, expectedSize int64, stream io.Reader) error { hdr, err := tar.FileInfoHeader(&tarFI{path: path, size: expectedSize}, "") if err != nil { return nil From dbc2073180d49eb01ed8033e64b4189c8cff40fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 7 Aug 2020 01:16:49 +0200 Subject: [PATCH 19/35] Split openArchiveForWriting from docker/archive/newImageDestination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add another caller in the next commit. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/archive/dest.go | 20 ++------------------ docker/archive/writer.go | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 18 deletions(-) create mode 100644 docker/archive/writer.go diff --git a/docker/archive/dest.go b/docker/archive/dest.go index 20a525408c..acebd90818 100644 --- a/docker/archive/dest.go +++ b/docker/archive/dest.go @@ -3,11 +3,9 @@ package archive import ( "context" "io" - "os" "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/types" - "github.com/pkg/errors" ) type archiveImageDestination struct { @@ -18,23 +16,9 @@ type archiveImageDestination struct { } func newImageDestination(sys *types.SystemContext, ref archiveReference) (types.ImageDestination, error) { - // ref.path can be either a pipe or a regular file - // in the case of a pipe, we require that we can open it for write - // in the case of a regular file, we don't want to overwrite any pre-existing file - // so we check for Size() == 0 below (This is racy, but using O_EXCL would also be racy, - // only in a different way. Either way, it’s up to the user to not have two writers to the same path.) - fh, err := os.OpenFile(ref.path, os.O_WRONLY|os.O_CREATE, 0644) + fh, err := openArchiveForWriting(ref.path) if err != nil { - return nil, errors.Wrapf(err, "error opening file %q", ref.path) - } - - fhStat, err := fh.Stat() - if err != nil { - return nil, errors.Wrapf(err, "error statting file %q", ref.path) - } - - if fhStat.Mode().IsRegular() && fhStat.Size() != 0 { - return nil, errors.New("docker-archive doesn't support modifying existing images") + return nil, err } archive := tarfile.NewWriter(fh) diff --git a/docker/archive/writer.go b/docker/archive/writer.go new file mode 100644 index 0000000000..4f9d9d77a8 --- /dev/null +++ b/docker/archive/writer.go @@ -0,0 +1,38 @@ +package archive + +import ( + "os" + + "github.com/pkg/errors" +) + +// openArchiveForWriting opens path for writing a tar archive, +// making a few sanity checks. +func openArchiveForWriting(path string) (*os.File, error) { + // path can be either a pipe or a regular file + // in the case of a pipe, we require that we can open it for write + // in the case of a regular file, we don't want to overwrite any pre-existing file + // so we check for Size() == 0 below (This is racy, but using O_EXCL would also be racy, + // only in a different way. Either way, it’s up to the user to not have two writers to the same path.) + fh, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return nil, errors.Wrapf(err, "error opening file %q", path) + } + succeeded := false + defer func() { + if !succeeded { + fh.Close() + } + }() + fhStat, err := fh.Stat() + if err != nil { + return nil, errors.Wrapf(err, "error statting file %q", path) + } + + if fhStat.Mode().IsRegular() && fhStat.Size() != 0 { + return nil, errors.New("docker-archive doesn't support modifying existing images") + } + + succeeded = true + return fh, nil +} From 611759b2179287db11129801cefe970d27cfbf1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 29 Jul 2020 22:04:03 +0200 Subject: [PATCH 20/35] Finally, introduce docker/archive.Writer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... which allows creating ImageReferece objects that all write to a shared tarfile.Writer. Signed-off-by: Miloslav Trmač --- docker/archive/dest.go | 34 +++++++++++++++++++--------- docker/archive/transport.go | 12 +++++++++- docker/archive/writer.go | 44 +++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 11 deletions(-) diff --git a/docker/archive/dest.go b/docker/archive/dest.go index acebd90818..1ead9c57b5 100644 --- a/docker/archive/dest.go +++ b/docker/archive/dest.go @@ -11,17 +11,25 @@ import ( type archiveImageDestination struct { *tarfile.Destination // Implements most of types.ImageDestination ref archiveReference - archive *tarfile.Writer - writer io.Closer + archive *tarfile.Writer // Should only be closed if writer != nil + writer io.Closer // May be nil if the archive is shared } func newImageDestination(sys *types.SystemContext, ref archiveReference) (types.ImageDestination, error) { - fh, err := openArchiveForWriting(ref.path) - if err != nil { - return nil, err - } + var archive *tarfile.Writer + var writer io.Closer + if ref.archiveWriter != nil { + archive = ref.archiveWriter + writer = nil + } else { + fh, err := openArchiveForWriting(ref.path) + if err != nil { + return nil, err + } - archive := tarfile.NewWriter(fh) + archive = tarfile.NewWriter(fh) + writer = fh + } tarDest := tarfile.NewDestination(sys, archive, ref.destinationRef) if sys != nil && sys.DockerArchiveAdditionalTags != nil { tarDest.AddRepoTags(sys.DockerArchiveAdditionalTags) @@ -30,7 +38,7 @@ func newImageDestination(sys *types.SystemContext, ref archiveReference) (types. Destination: tarDest, ref: ref, archive: archive, - writer: fh, + writer: writer, }, nil } @@ -47,7 +55,10 @@ func (d *archiveImageDestination) Reference() types.ImageReference { // Close removes resources associated with an initialized ImageDestination, if any. func (d *archiveImageDestination) Close() error { - return d.writer.Close() + if d.writer != nil { + return d.writer.Close() + } + return nil } // Commit marks the process of storing the image as successful and asks for the image to be persisted. @@ -55,5 +66,8 @@ func (d *archiveImageDestination) Close() error { // - Uploaded data MAY be visible to others before Commit() is called // - Uploaded data MAY be removed or MAY remain around if Close() is called without Commit() (i.e. rollback is allowed but not guaranteed) func (d *archiveImageDestination) Commit(ctx context.Context, unparsedToplevel types.UnparsedImage) error { - return d.archive.Close() + if d.writer != nil { + return d.archive.Close() + } + return nil } diff --git a/docker/archive/transport.go b/docker/archive/transport.go index 26bc687e00..08c077701f 100644 --- a/docker/archive/transport.go +++ b/docker/archive/transport.go @@ -5,6 +5,7 @@ import ( "fmt" "strings" + "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/docker/reference" ctrImage "github.com/containers/image/v5/image" "github.com/containers/image/v5/transports" @@ -45,6 +46,8 @@ type archiveReference struct { // only used for destinations, // archiveReference.destinationRef is optional and can be nil for destinations as well. destinationRef reference.NamedTagged + // If not nil, must have been created for path + archiveWriter *tarfile.Writer } // ParseReference converts a string, which should not start with the ImageTransport.Name prefix, into an Docker ImageReference. @@ -75,8 +78,14 @@ func ParseReference(refString string) (types.ImageReference, error) { return NewReference(path, destinationRef) } -// NewReference rethrns a Docker archive reference for a path and an optional destination reference. +// NewReference returns a Docker archive reference for a path and an optional destination reference. func NewReference(path string, destinationRef reference.NamedTagged) (types.ImageReference, error) { + return newReference(path, destinationRef, nil) +} + +// newReference returns a docker archive reference for a path, an optional reference or sourceIndex, +// and optionally a tarfile.Writer matching path. +func newReference(path string, destinationRef reference.NamedTagged, archiveWriter *tarfile.Writer) (types.ImageReference, error) { if strings.Contains(path, ":") { return nil, errors.Errorf("Invalid docker-archive: reference: colon in path %q is not supported", path) } @@ -86,6 +95,7 @@ func NewReference(path string, destinationRef reference.NamedTagged) (types.Imag return archiveReference{ path: path, destinationRef: destinationRef, + archiveWriter: archiveWriter, }, nil } diff --git a/docker/archive/writer.go b/docker/archive/writer.go index 4f9d9d77a8..b54c5ddd36 100644 --- a/docker/archive/writer.go +++ b/docker/archive/writer.go @@ -1,11 +1,55 @@ package archive import ( + "io" "os" + "github.com/containers/image/v5/docker/internal/tarfile" + "github.com/containers/image/v5/docker/reference" + "github.com/containers/image/v5/types" "github.com/pkg/errors" ) +// Writer manages a single in-progress Docker archive and allows adding images to it. +type Writer struct { + path string // The original, user-specified path; not the maintained temporary file, if any + archive *tarfile.Writer + writer io.Closer +} + +// NewWriter returns a Writer for path. +// The caller should call .Close() on the returned object. +func NewWriter(sys *types.SystemContext, path string) (*Writer, error) { + fh, err := openArchiveForWriting(path) + if err != nil { + return nil, err + } + archive := tarfile.NewWriter(fh) + + return &Writer{ + path: path, + archive: archive, + writer: fh, + }, nil +} + +// Close writes all outstanding data about images to the archive, and +// releases state associated with the Writer, if any. +// No more images can be added after this is called. +func (w *Writer) Close() error { + err := w.archive.Close() + if err2 := w.writer.Close(); err2 != nil && err == nil { + err = err2 + } + return err +} + +// NewReference returns an ImageReference that allows adding an image to Writer, +// with an optional reference. +func (w *Writer) NewReference(destinationRef reference.NamedTagged) (types.ImageReference, error) { + return newReference(w.path, destinationRef, w.archive) +} + // openArchiveForWriting opens path for writing a tar archive, // making a few sanity checks. func openArchiveForWriting(path string) (*os.File, error) { From 3c9322ddf7ba72a3d38902af449f9aa1ab9304be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 28 Jul 2020 15:09:12 +0200 Subject: [PATCH 21/35] Fix an error message on docker-archive:path:name@sha256:$digest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a possible user error, not a supposedly-unreachable internal error. Signed-off-by: Miloslav Trmač --- docker/archive/transport.go | 5 ++--- go.sum | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/archive/transport.go b/docker/archive/transport.go index 26bc687e00..4c35dc290e 100644 --- a/docker/archive/transport.go +++ b/docker/archive/transport.go @@ -65,9 +65,8 @@ func ParseReference(refString string) (types.ImageReference, error) { } ref = reference.TagNameOnly(ref) refTagged, isTagged := ref.(reference.NamedTagged) - if !isTagged { - // Really shouldn't be hit... - return nil, errors.Errorf("internal error: reference is not tagged even after reference.TagNameOnly: %s", refString) + if !isTagged { // If ref contains a digest, TagNameOnly does not change it + return nil, errors.Errorf("reference does not include a tag: %s", ref.String()) } destinationRef = refTagged } diff --git a/go.sum b/go.sum index a41fa5bfa0..d464655f0e 100644 --- a/go.sum +++ b/go.sum @@ -107,6 +107,7 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls= github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= @@ -399,6 +400,7 @@ google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873 h1:nfPFGzJkUDX6uBm google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.23.1 h1:q4XQuHFC6I28BKZpo6IYyb3mNO+l7lSOxRuYTCiDfXk= google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.24.0 h1:vb/1TCsVn3DcJlQ0Gs1yB1pKI6Do2/QNwxdKqmc/b0s= google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= From 0d0ea66f76fe7a99eb81200c89a83d308f69108e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 20:27:06 +0200 Subject: [PATCH 22/35] Remove an obsolete FIXME. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/tarfile/src.go | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/tarfile/src.go b/docker/tarfile/src.go index f04fc16cd1..77f4b0a862 100644 --- a/docker/tarfile/src.go +++ b/docker/tarfile/src.go @@ -89,7 +89,6 @@ func NewSourceFromStream(inputStream io.Reader) (*Source, error) { // which can be either compressed or uncompressed. The caller can close the // inputStream immediately after NewSourceFromFile returns. func NewSourceFromStreamWithSystemContext(sys *types.SystemContext, inputStream io.Reader) (*Source, error) { - // FIXME: use SystemContext here. // Save inputStream to a temporary file tarCopyFile, err := ioutil.TempFile(tmpdir.TemporaryDirectoryForBigFiles(sys), "docker-tar") if err != nil { From f358587911862a53293d776dfa8d1c5bac682f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 19:19:02 +0200 Subject: [PATCH 23/35] Move docker/tarfile.Source to docker/internal/tarfile.Source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... so that we can add more functionality to it without making it public API. Keep the original docker/tarfile.Source as a compatibility shim. ManifestItem is moved to the private package to avoid an import cycle, but remains visible. docker/tarfile/src_test.go should logically be moved as well, but we can't do that yet due to the import cycle. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/src.go | 511 +++++++++++++++++++++++++++++++ docker/internal/tarfile/types.go | 24 ++ docker/tarfile/src.go | 441 +------------------------- docker/tarfile/types.go | 13 +- 4 files changed, 553 insertions(+), 436 deletions(-) create mode 100644 docker/internal/tarfile/src.go create mode 100644 docker/internal/tarfile/types.go diff --git a/docker/internal/tarfile/src.go b/docker/internal/tarfile/src.go new file mode 100644 index 0000000000..77f4b0a862 --- /dev/null +++ b/docker/internal/tarfile/src.go @@ -0,0 +1,511 @@ +package tarfile + +import ( + "archive/tar" + "bytes" + "context" + "encoding/json" + "io" + "io/ioutil" + "os" + "path" + "sync" + + "github.com/containers/image/v5/internal/iolimits" + "github.com/containers/image/v5/internal/tmpdir" + "github.com/containers/image/v5/manifest" + "github.com/containers/image/v5/pkg/compression" + "github.com/containers/image/v5/types" + digest "github.com/opencontainers/go-digest" + "github.com/pkg/errors" +) + +// Source is a partial implementation of types.ImageSource for reading from tarPath. +type Source struct { + tarPath string + removeTarPathOnClose bool // Remove temp file on close if true + // The following data is only available after ensureCachedDataIsPresent() succeeds + tarManifest *ManifestItem // nil if not available yet. + configBytes []byte + configDigest digest.Digest + orderedDiffIDList []digest.Digest + knownLayers map[digest.Digest]*layerInfo + // Other state + generatedManifest []byte // Private cache for GetManifest(), nil if not set yet. + cacheDataLock sync.Once // Private state for ensureCachedDataIsPresent to make it concurrency-safe + cacheDataResult error // Private state for ensureCachedDataIsPresent +} + +type layerInfo struct { + path string + size int64 +} + +// TODO: We could add support for multiple images in a single archive, so +// that people could use docker-archive:opensuse.tar:opensuse:leap as +// the source of an image. +// To do for both the NewSourceFromFile and NewSourceFromStream functions + +// NewSourceFromFile returns a tarfile.Source for the specified path. +// Deprecated: Please use NewSourceFromFileWithContext which will allows you to configure temp directory +// for big files through SystemContext.BigFilesTemporaryDir +func NewSourceFromFile(path string) (*Source, error) { + return NewSourceFromFileWithContext(nil, path) +} + +// NewSourceFromFileWithContext returns a tarfile.Source for the specified path. +func NewSourceFromFileWithContext(sys *types.SystemContext, path string) (*Source, error) { + file, err := os.Open(path) + if err != nil { + return nil, errors.Wrapf(err, "error opening file %q", path) + } + defer file.Close() + + // If the file is already not compressed we can just return the file itself + // as a source. Otherwise we pass the stream to NewSourceFromStream. + stream, isCompressed, err := compression.AutoDecompress(file) + if err != nil { + return nil, errors.Wrapf(err, "Error detecting compression for file %q", path) + } + defer stream.Close() + if !isCompressed { + return &Source{ + tarPath: path, + }, nil + } + return NewSourceFromStreamWithSystemContext(sys, stream) +} + +// NewSourceFromStream returns a tarfile.Source for the specified inputStream, +// which can be either compressed or uncompressed. The caller can close the +// inputStream immediately after NewSourceFromFile returns. +// Deprecated: Please use NewSourceFromStreamWithSystemContext which will allows you to configure +// temp directory for big files through SystemContext.BigFilesTemporaryDir +func NewSourceFromStream(inputStream io.Reader) (*Source, error) { + return NewSourceFromStreamWithSystemContext(nil, inputStream) +} + +// NewSourceFromStreamWithSystemContext returns a tarfile.Source for the specified inputStream, +// which can be either compressed or uncompressed. The caller can close the +// inputStream immediately after NewSourceFromFile returns. +func NewSourceFromStreamWithSystemContext(sys *types.SystemContext, inputStream io.Reader) (*Source, error) { + // Save inputStream to a temporary file + tarCopyFile, err := ioutil.TempFile(tmpdir.TemporaryDirectoryForBigFiles(sys), "docker-tar") + if err != nil { + return nil, errors.Wrap(err, "error creating temporary file") + } + defer tarCopyFile.Close() + + succeeded := false + defer func() { + if !succeeded { + os.Remove(tarCopyFile.Name()) + } + }() + + // In order to be compatible with docker-load, we need to support + // auto-decompression (it's also a nice quality-of-life thing to avoid + // giving users really confusing "invalid tar header" errors). + uncompressedStream, _, err := compression.AutoDecompress(inputStream) + if err != nil { + return nil, errors.Wrap(err, "Error auto-decompressing input") + } + defer uncompressedStream.Close() + + // Copy the plain archive to the temporary file. + // + // TODO: This can take quite some time, and should ideally be cancellable + // using a context.Context. + if _, err := io.Copy(tarCopyFile, uncompressedStream); err != nil { + return nil, errors.Wrapf(err, "error copying contents to temporary file %q", tarCopyFile.Name()) + } + succeeded = true + + return &Source{ + tarPath: tarCopyFile.Name(), + removeTarPathOnClose: true, + }, nil +} + +// tarReadCloser is a way to close the backing file of a tar.Reader when the user no longer needs the tar component. +type tarReadCloser struct { + *tar.Reader + backingFile *os.File +} + +func (t *tarReadCloser) Close() error { + return t.backingFile.Close() +} + +// openTarComponent returns a ReadCloser for the specific file within the archive. +// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers), +// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough. +// The caller should call .Close() on the returned stream. +func (s *Source) openTarComponent(componentPath string) (io.ReadCloser, error) { + f, err := os.Open(s.tarPath) + if err != nil { + return nil, err + } + succeeded := false + defer func() { + if !succeeded { + f.Close() + } + }() + + tarReader, header, err := findTarComponent(f, componentPath) + if err != nil { + return nil, err + } + if header == nil { + return nil, os.ErrNotExist + } + if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // FIXME: untested + // We follow only one symlink; so no loops are possible. + if _, err := f.Seek(0, io.SeekStart); err != nil { + return nil, err + } + // The new path could easily point "outside" the archive, but we only compare it to existing tar headers without extracting the archive, + // so we don't care. + tarReader, header, err = findTarComponent(f, path.Join(path.Dir(componentPath), header.Linkname)) + if err != nil { + return nil, err + } + if header == nil { + return nil, os.ErrNotExist + } + } + + if !header.FileInfo().Mode().IsRegular() { + return nil, errors.Errorf("Error reading tar archive component %s: not a regular file", header.Name) + } + succeeded = true + return &tarReadCloser{Reader: tarReader, backingFile: f}, nil +} + +// findTarComponent returns a header and a reader matching componentPath within inputFile, +// or (nil, nil, nil) if not found. +func findTarComponent(inputFile io.Reader, componentPath string) (*tar.Reader, *tar.Header, error) { + t := tar.NewReader(inputFile) + componentPath = path.Clean(componentPath) + for { + h, err := t.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, nil, err + } + if path.Clean(h.Name) == componentPath { + return t, h, nil + } + } + return nil, nil, nil +} + +// readTarComponent returns full contents of componentPath. +func (s *Source) readTarComponent(path string, limit int) ([]byte, error) { + file, err := s.openTarComponent(path) + if err != nil { + return nil, errors.Wrapf(err, "Error loading tar component %s", path) + } + defer file.Close() + bytes, err := iolimits.ReadAtMost(file, limit) + if err != nil { + return nil, err + } + return bytes, nil +} + +// ensureCachedDataIsPresent loads data necessary for any of the public accessors. +// It is safe to call this from multi-threaded code. +func (s *Source) ensureCachedDataIsPresent() error { + s.cacheDataLock.Do(func() { + s.cacheDataResult = s.ensureCachedDataIsPresentPrivate() + }) + return s.cacheDataResult +} + +// ensureCachedDataIsPresentPrivate is a private implementation detail of ensureCachedDataIsPresent. +// Call ensureCachedDataIsPresent instead. +func (s *Source) ensureCachedDataIsPresentPrivate() error { + // Read and parse manifest.json + tarManifest, err := s.loadTarManifest() + if err != nil { + return err + } + + // Check to make sure length is 1 + if len(tarManifest) != 1 { + return errors.Errorf("Unexpected tar manifest.json: expected 1 item, got %d", len(tarManifest)) + } + + // Read and parse config. + configBytes, err := s.readTarComponent(tarManifest[0].Config, iolimits.MaxConfigBodySize) + if err != nil { + return err + } + var parsedConfig manifest.Schema2Image // There's a lot of info there, but we only really care about layer DiffIDs. + if err := json.Unmarshal(configBytes, &parsedConfig); err != nil { + return errors.Wrapf(err, "Error decoding tar config %s", tarManifest[0].Config) + } + if parsedConfig.RootFS == nil { + return errors.Errorf("Invalid image config (rootFS is not set): %s", tarManifest[0].Config) + } + + knownLayers, err := s.prepareLayerData(&tarManifest[0], &parsedConfig) + if err != nil { + return err + } + + // Success; commit. + s.tarManifest = &tarManifest[0] + s.configBytes = configBytes + s.configDigest = digest.FromBytes(configBytes) + s.orderedDiffIDList = parsedConfig.RootFS.DiffIDs + s.knownLayers = knownLayers + return nil +} + +// loadTarManifest loads and decodes the manifest.json. +func (s *Source) loadTarManifest() ([]ManifestItem, error) { + // FIXME? Do we need to deal with the legacy format? + bytes, err := s.readTarComponent(manifestFileName, iolimits.MaxTarFileManifestSize) + if err != nil { + return nil, err + } + var items []ManifestItem + if err := json.Unmarshal(bytes, &items); err != nil { + return nil, errors.Wrap(err, "Error decoding tar manifest.json") + } + return items, nil +} + +// Close removes resources associated with an initialized Source, if any. +func (s *Source) Close() error { + if s.removeTarPathOnClose { + return os.Remove(s.tarPath) + } + return nil +} + +// LoadTarManifest loads and decodes the manifest.json +func (s *Source) LoadTarManifest() ([]ManifestItem, error) { + return s.loadTarManifest() +} + +func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manifest.Schema2Image) (map[digest.Digest]*layerInfo, error) { + // Collect layer data available in manifest and config. + if len(tarManifest.Layers) != len(parsedConfig.RootFS.DiffIDs) { + return nil, errors.Errorf("Inconsistent layer count: %d in manifest, %d in config", len(tarManifest.Layers), len(parsedConfig.RootFS.DiffIDs)) + } + knownLayers := map[digest.Digest]*layerInfo{} + unknownLayerSizes := map[string]*layerInfo{} // Points into knownLayers, a "to do list" of items with unknown sizes. + for i, diffID := range parsedConfig.RootFS.DiffIDs { + if _, ok := knownLayers[diffID]; ok { + // Apparently it really can happen that a single image contains the same layer diff more than once. + // In that case, the diffID validation ensures that both layers truly are the same, and it should not matter + // which of the tarManifest.Layers paths is used; (docker save) actually makes the duplicates symlinks to the original. + continue + } + layerPath := path.Clean(tarManifest.Layers[i]) + if _, ok := unknownLayerSizes[layerPath]; ok { + return nil, errors.Errorf("Layer tarfile %s used for two different DiffID values", layerPath) + } + li := &layerInfo{ // A new element in each iteration + path: layerPath, + size: -1, + } + knownLayers[diffID] = li + unknownLayerSizes[layerPath] = li + } + + // Scan the tar file to collect layer sizes. + file, err := os.Open(s.tarPath) + if err != nil { + return nil, err + } + defer file.Close() + t := tar.NewReader(file) + for { + h, err := t.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + layerPath := path.Clean(h.Name) + if li, ok := unknownLayerSizes[layerPath]; ok { + // Since GetBlob will decompress layers that are compressed we need + // to do the decompression here as well, otherwise we will + // incorrectly report the size. Pretty critical, since tools like + // umoci always compress layer blobs. Obviously we only bother with + // the slower method of checking if it's compressed. + uncompressedStream, isCompressed, err := compression.AutoDecompress(t) + if err != nil { + return nil, errors.Wrapf(err, "Error auto-decompressing %s to determine its size", layerPath) + } + defer uncompressedStream.Close() + + uncompressedSize := h.Size + if isCompressed { + uncompressedSize, err = io.Copy(ioutil.Discard, uncompressedStream) + if err != nil { + return nil, errors.Wrapf(err, "Error reading %s to find its size", layerPath) + } + } + li.size = uncompressedSize + delete(unknownLayerSizes, layerPath) + } + } + if len(unknownLayerSizes) != 0 { + return nil, errors.Errorf("Some layer tarfiles are missing in the tarball") // This could do with a better error reporting, if this ever happened in practice. + } + + return knownLayers, nil +} + +// GetManifest returns the image's manifest along with its MIME type (which may be empty when it can't be determined but the manifest is available). +// It may use a remote (= slow) service. +// If instanceDigest is not nil, it contains a digest of the specific manifest instance to retrieve (when the primary manifest is a manifest list); +// this never happens if the primary manifest is not a manifest list (e.g. if the source never returns manifest lists). +// This source implementation does not support manifest lists, so the passed-in instanceDigest should always be nil, +// as the primary manifest can not be a list, so there can be no secondary instances. +func (s *Source) GetManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { + if instanceDigest != nil { + // How did we even get here? GetManifest(ctx, nil) has returned a manifest.DockerV2Schema2MediaType. + return nil, "", errors.New(`Manifest lists are not supported by "docker-daemon:"`) + } + if s.generatedManifest == nil { + if err := s.ensureCachedDataIsPresent(); err != nil { + return nil, "", err + } + m := manifest.Schema2{ + SchemaVersion: 2, + MediaType: manifest.DockerV2Schema2MediaType, + ConfigDescriptor: manifest.Schema2Descriptor{ + MediaType: manifest.DockerV2Schema2ConfigMediaType, + Size: int64(len(s.configBytes)), + Digest: s.configDigest, + }, + LayersDescriptors: []manifest.Schema2Descriptor{}, + } + for _, diffID := range s.orderedDiffIDList { + li, ok := s.knownLayers[diffID] + if !ok { + return nil, "", errors.Errorf("Internal inconsistency: Information about layer %s missing", diffID) + } + m.LayersDescriptors = append(m.LayersDescriptors, manifest.Schema2Descriptor{ + Digest: diffID, // diffID is a digest of the uncompressed tarball + MediaType: manifest.DockerV2Schema2LayerMediaType, + Size: li.size, + }) + } + manifestBytes, err := json.Marshal(&m) + if err != nil { + return nil, "", err + } + s.generatedManifest = manifestBytes + } + return s.generatedManifest, manifest.DockerV2Schema2MediaType, nil +} + +// uncompressedReadCloser is an io.ReadCloser that closes both the uncompressed stream and the underlying input. +type uncompressedReadCloser struct { + io.Reader + underlyingCloser func() error + uncompressedCloser func() error +} + +func (r uncompressedReadCloser) Close() error { + var res error + if err := r.uncompressedCloser(); err != nil { + res = err + } + if err := r.underlyingCloser(); err != nil && res == nil { + res = err + } + return res +} + +// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently. +func (s *Source) HasThreadSafeGetBlob() bool { + return true +} + +// GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown). +// The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided. +// May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location. +func (s *Source) GetBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache) (io.ReadCloser, int64, error) { + if err := s.ensureCachedDataIsPresent(); err != nil { + return nil, 0, err + } + + if info.Digest == s.configDigest { // FIXME? Implement a more general algorithm matching instead of assuming sha256. + return ioutil.NopCloser(bytes.NewReader(s.configBytes)), int64(len(s.configBytes)), nil + } + + if li, ok := s.knownLayers[info.Digest]; ok { // diffID is a digest of the uncompressed tarball, + underlyingStream, err := s.openTarComponent(li.path) + if err != nil { + return nil, 0, err + } + closeUnderlyingStream := true + defer func() { + if closeUnderlyingStream { + underlyingStream.Close() + } + }() + + // In order to handle the fact that digests != diffIDs (and thus that a + // caller which is trying to verify the blob will run into problems), + // we need to decompress blobs. This is a bit ugly, but it's a + // consequence of making everything addressable by their DiffID rather + // than by their digest... + // + // In particular, because the v2s2 manifest being generated uses + // DiffIDs, any caller of GetBlob is going to be asking for DiffIDs of + // layers not their _actual_ digest. The result is that copy/... will + // be verifing a "digest" which is not the actual layer's digest (but + // is instead the DiffID). + + uncompressedStream, _, err := compression.AutoDecompress(underlyingStream) + if err != nil { + return nil, 0, errors.Wrapf(err, "Error auto-decompressing blob %s", info.Digest) + } + + newStream := uncompressedReadCloser{ + Reader: uncompressedStream, + underlyingCloser: underlyingStream.Close, + uncompressedCloser: uncompressedStream.Close, + } + closeUnderlyingStream = false + + return newStream, li.size, nil + } + + return nil, 0, errors.Errorf("Unknown blob %s", info.Digest) +} + +// GetSignatures returns the image's signatures. It may use a remote (= slow) service. +// This source implementation does not support manifest lists, so the passed-in instanceDigest should always be nil, +// as there can be no secondary manifests. +func (s *Source) GetSignatures(ctx context.Context, instanceDigest *digest.Digest) ([][]byte, error) { + if instanceDigest != nil { + // How did we even get here? GetManifest(ctx, nil) has returned a manifest.DockerV2Schema2MediaType. + return nil, errors.Errorf(`Manifest lists are not supported by "docker-daemon:"`) + } + return [][]byte{}, nil +} + +// LayerInfosForCopy returns either nil (meaning the values in the manifest are fine), or updated values for the layer +// blobsums that are listed in the image's manifest. If values are returned, they should be used when using GetBlob() +// to read the image's layers. +// This source implementation does not support manifest lists, so the passed-in instanceDigest should always be nil, +// as the primary manifest can not be a list, so there can be no secondary manifests. +// The Digest field is guaranteed to be provided; Size may be -1. +// WARNING: The list may contain duplicates, and they are semantically relevant. +func (s *Source) LayerInfosForCopy(context.Context, *digest.Digest) ([]types.BlobInfo, error) { + return nil, nil +} diff --git a/docker/internal/tarfile/types.go b/docker/internal/tarfile/types.go new file mode 100644 index 0000000000..0a5103a95c --- /dev/null +++ b/docker/internal/tarfile/types.go @@ -0,0 +1,24 @@ +package tarfile + +import ( + "github.com/containers/image/v5/manifest" + "github.com/opencontainers/go-digest" +) + +// Various data structures. + +// Based on github.com/docker/docker/image/tarexport/tarexport.go +const ( + manifestFileName = "manifest.json" +) + +// ManifestItem is an element of the array stored in the top-level manifest.json file. +type ManifestItem struct { // NOTE: This is visible as docker/tarfile.ManifestItem, and a part of the stable API. + Config string + RepoTags []string + Layers []string + Parent imageID `json:",omitempty"` + LayerSources map[digest.Digest]manifest.Schema2Descriptor `json:",omitempty"` +} + +type imageID string diff --git a/docker/tarfile/src.go b/docker/tarfile/src.go index 77f4b0a862..0b313dff0d 100644 --- a/docker/tarfile/src.go +++ b/docker/tarfile/src.go @@ -1,51 +1,20 @@ package tarfile import ( - "archive/tar" - "bytes" "context" - "encoding/json" "io" - "io/ioutil" - "os" - "path" - "sync" - "github.com/containers/image/v5/internal/iolimits" - "github.com/containers/image/v5/internal/tmpdir" - "github.com/containers/image/v5/manifest" - "github.com/containers/image/v5/pkg/compression" + internal "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/types" digest "github.com/opencontainers/go-digest" - "github.com/pkg/errors" ) // Source is a partial implementation of types.ImageSource for reading from tarPath. +// Most users should use this via implementations of ImageReference form docker/archive or docker/daemon. type Source struct { - tarPath string - removeTarPathOnClose bool // Remove temp file on close if true - // The following data is only available after ensureCachedDataIsPresent() succeeds - tarManifest *ManifestItem // nil if not available yet. - configBytes []byte - configDigest digest.Digest - orderedDiffIDList []digest.Digest - knownLayers map[digest.Digest]*layerInfo - // Other state - generatedManifest []byte // Private cache for GetManifest(), nil if not set yet. - cacheDataLock sync.Once // Private state for ensureCachedDataIsPresent to make it concurrency-safe - cacheDataResult error // Private state for ensureCachedDataIsPresent + internal *internal.Source } -type layerInfo struct { - path string - size int64 -} - -// TODO: We could add support for multiple images in a single archive, so -// that people could use docker-archive:opensuse.tar:opensuse:leap as -// the source of an image. -// To do for both the NewSourceFromFile and NewSourceFromStream functions - // NewSourceFromFile returns a tarfile.Source for the specified path. // Deprecated: Please use NewSourceFromFileWithContext which will allows you to configure temp directory // for big files through SystemContext.BigFilesTemporaryDir @@ -55,25 +24,11 @@ func NewSourceFromFile(path string) (*Source, error) { // NewSourceFromFileWithContext returns a tarfile.Source for the specified path. func NewSourceFromFileWithContext(sys *types.SystemContext, path string) (*Source, error) { - file, err := os.Open(path) - if err != nil { - return nil, errors.Wrapf(err, "error opening file %q", path) - } - defer file.Close() - - // If the file is already not compressed we can just return the file itself - // as a source. Otherwise we pass the stream to NewSourceFromStream. - stream, isCompressed, err := compression.AutoDecompress(file) + src, err := internal.NewSourceFromFileWithContext(sys, path) if err != nil { - return nil, errors.Wrapf(err, "Error detecting compression for file %q", path) - } - defer stream.Close() - if !isCompressed { - return &Source{ - tarPath: path, - }, nil + return nil, err } - return NewSourceFromStreamWithSystemContext(sys, stream) + return &Source{internal: src}, nil } // NewSourceFromStream returns a tarfile.Source for the specified inputStream, @@ -89,281 +44,21 @@ func NewSourceFromStream(inputStream io.Reader) (*Source, error) { // which can be either compressed or uncompressed. The caller can close the // inputStream immediately after NewSourceFromFile returns. func NewSourceFromStreamWithSystemContext(sys *types.SystemContext, inputStream io.Reader) (*Source, error) { - // Save inputStream to a temporary file - tarCopyFile, err := ioutil.TempFile(tmpdir.TemporaryDirectoryForBigFiles(sys), "docker-tar") - if err != nil { - return nil, errors.Wrap(err, "error creating temporary file") - } - defer tarCopyFile.Close() - - succeeded := false - defer func() { - if !succeeded { - os.Remove(tarCopyFile.Name()) - } - }() - - // In order to be compatible with docker-load, we need to support - // auto-decompression (it's also a nice quality-of-life thing to avoid - // giving users really confusing "invalid tar header" errors). - uncompressedStream, _, err := compression.AutoDecompress(inputStream) - if err != nil { - return nil, errors.Wrap(err, "Error auto-decompressing input") - } - defer uncompressedStream.Close() - - // Copy the plain archive to the temporary file. - // - // TODO: This can take quite some time, and should ideally be cancellable - // using a context.Context. - if _, err := io.Copy(tarCopyFile, uncompressedStream); err != nil { - return nil, errors.Wrapf(err, "error copying contents to temporary file %q", tarCopyFile.Name()) - } - succeeded = true - - return &Source{ - tarPath: tarCopyFile.Name(), - removeTarPathOnClose: true, - }, nil -} - -// tarReadCloser is a way to close the backing file of a tar.Reader when the user no longer needs the tar component. -type tarReadCloser struct { - *tar.Reader - backingFile *os.File -} - -func (t *tarReadCloser) Close() error { - return t.backingFile.Close() -} - -// openTarComponent returns a ReadCloser for the specific file within the archive. -// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers), -// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough. -// The caller should call .Close() on the returned stream. -func (s *Source) openTarComponent(componentPath string) (io.ReadCloser, error) { - f, err := os.Open(s.tarPath) - if err != nil { - return nil, err - } - succeeded := false - defer func() { - if !succeeded { - f.Close() - } - }() - - tarReader, header, err := findTarComponent(f, componentPath) - if err != nil { - return nil, err - } - if header == nil { - return nil, os.ErrNotExist - } - if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // FIXME: untested - // We follow only one symlink; so no loops are possible. - if _, err := f.Seek(0, io.SeekStart); err != nil { - return nil, err - } - // The new path could easily point "outside" the archive, but we only compare it to existing tar headers without extracting the archive, - // so we don't care. - tarReader, header, err = findTarComponent(f, path.Join(path.Dir(componentPath), header.Linkname)) - if err != nil { - return nil, err - } - if header == nil { - return nil, os.ErrNotExist - } - } - - if !header.FileInfo().Mode().IsRegular() { - return nil, errors.Errorf("Error reading tar archive component %s: not a regular file", header.Name) - } - succeeded = true - return &tarReadCloser{Reader: tarReader, backingFile: f}, nil -} - -// findTarComponent returns a header and a reader matching componentPath within inputFile, -// or (nil, nil, nil) if not found. -func findTarComponent(inputFile io.Reader, componentPath string) (*tar.Reader, *tar.Header, error) { - t := tar.NewReader(inputFile) - componentPath = path.Clean(componentPath) - for { - h, err := t.Next() - if err == io.EOF { - break - } - if err != nil { - return nil, nil, err - } - if path.Clean(h.Name) == componentPath { - return t, h, nil - } - } - return nil, nil, nil -} - -// readTarComponent returns full contents of componentPath. -func (s *Source) readTarComponent(path string, limit int) ([]byte, error) { - file, err := s.openTarComponent(path) - if err != nil { - return nil, errors.Wrapf(err, "Error loading tar component %s", path) - } - defer file.Close() - bytes, err := iolimits.ReadAtMost(file, limit) - if err != nil { - return nil, err - } - return bytes, nil -} - -// ensureCachedDataIsPresent loads data necessary for any of the public accessors. -// It is safe to call this from multi-threaded code. -func (s *Source) ensureCachedDataIsPresent() error { - s.cacheDataLock.Do(func() { - s.cacheDataResult = s.ensureCachedDataIsPresentPrivate() - }) - return s.cacheDataResult -} - -// ensureCachedDataIsPresentPrivate is a private implementation detail of ensureCachedDataIsPresent. -// Call ensureCachedDataIsPresent instead. -func (s *Source) ensureCachedDataIsPresentPrivate() error { - // Read and parse manifest.json - tarManifest, err := s.loadTarManifest() - if err != nil { - return err - } - - // Check to make sure length is 1 - if len(tarManifest) != 1 { - return errors.Errorf("Unexpected tar manifest.json: expected 1 item, got %d", len(tarManifest)) - } - - // Read and parse config. - configBytes, err := s.readTarComponent(tarManifest[0].Config, iolimits.MaxConfigBodySize) - if err != nil { - return err - } - var parsedConfig manifest.Schema2Image // There's a lot of info there, but we only really care about layer DiffIDs. - if err := json.Unmarshal(configBytes, &parsedConfig); err != nil { - return errors.Wrapf(err, "Error decoding tar config %s", tarManifest[0].Config) - } - if parsedConfig.RootFS == nil { - return errors.Errorf("Invalid image config (rootFS is not set): %s", tarManifest[0].Config) - } - - knownLayers, err := s.prepareLayerData(&tarManifest[0], &parsedConfig) - if err != nil { - return err - } - - // Success; commit. - s.tarManifest = &tarManifest[0] - s.configBytes = configBytes - s.configDigest = digest.FromBytes(configBytes) - s.orderedDiffIDList = parsedConfig.RootFS.DiffIDs - s.knownLayers = knownLayers - return nil -} - -// loadTarManifest loads and decodes the manifest.json. -func (s *Source) loadTarManifest() ([]ManifestItem, error) { - // FIXME? Do we need to deal with the legacy format? - bytes, err := s.readTarComponent(manifestFileName, iolimits.MaxTarFileManifestSize) + src, err := internal.NewSourceFromStreamWithSystemContext(sys, inputStream) if err != nil { return nil, err } - var items []ManifestItem - if err := json.Unmarshal(bytes, &items); err != nil { - return nil, errors.Wrap(err, "Error decoding tar manifest.json") - } - return items, nil + return &Source{internal: src}, nil } // Close removes resources associated with an initialized Source, if any. func (s *Source) Close() error { - if s.removeTarPathOnClose { - return os.Remove(s.tarPath) - } - return nil + return s.internal.Close() } // LoadTarManifest loads and decodes the manifest.json func (s *Source) LoadTarManifest() ([]ManifestItem, error) { - return s.loadTarManifest() -} - -func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manifest.Schema2Image) (map[digest.Digest]*layerInfo, error) { - // Collect layer data available in manifest and config. - if len(tarManifest.Layers) != len(parsedConfig.RootFS.DiffIDs) { - return nil, errors.Errorf("Inconsistent layer count: %d in manifest, %d in config", len(tarManifest.Layers), len(parsedConfig.RootFS.DiffIDs)) - } - knownLayers := map[digest.Digest]*layerInfo{} - unknownLayerSizes := map[string]*layerInfo{} // Points into knownLayers, a "to do list" of items with unknown sizes. - for i, diffID := range parsedConfig.RootFS.DiffIDs { - if _, ok := knownLayers[diffID]; ok { - // Apparently it really can happen that a single image contains the same layer diff more than once. - // In that case, the diffID validation ensures that both layers truly are the same, and it should not matter - // which of the tarManifest.Layers paths is used; (docker save) actually makes the duplicates symlinks to the original. - continue - } - layerPath := path.Clean(tarManifest.Layers[i]) - if _, ok := unknownLayerSizes[layerPath]; ok { - return nil, errors.Errorf("Layer tarfile %s used for two different DiffID values", layerPath) - } - li := &layerInfo{ // A new element in each iteration - path: layerPath, - size: -1, - } - knownLayers[diffID] = li - unknownLayerSizes[layerPath] = li - } - - // Scan the tar file to collect layer sizes. - file, err := os.Open(s.tarPath) - if err != nil { - return nil, err - } - defer file.Close() - t := tar.NewReader(file) - for { - h, err := t.Next() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - layerPath := path.Clean(h.Name) - if li, ok := unknownLayerSizes[layerPath]; ok { - // Since GetBlob will decompress layers that are compressed we need - // to do the decompression here as well, otherwise we will - // incorrectly report the size. Pretty critical, since tools like - // umoci always compress layer blobs. Obviously we only bother with - // the slower method of checking if it's compressed. - uncompressedStream, isCompressed, err := compression.AutoDecompress(t) - if err != nil { - return nil, errors.Wrapf(err, "Error auto-decompressing %s to determine its size", layerPath) - } - defer uncompressedStream.Close() - - uncompressedSize := h.Size - if isCompressed { - uncompressedSize, err = io.Copy(ioutil.Discard, uncompressedStream) - if err != nil { - return nil, errors.Wrapf(err, "Error reading %s to find its size", layerPath) - } - } - li.size = uncompressedSize - delete(unknownLayerSizes, layerPath) - } - } - if len(unknownLayerSizes) != 0 { - return nil, errors.Errorf("Some layer tarfiles are missing in the tarball") // This could do with a better error reporting, if this ever happened in practice. - } - - return knownLayers, nil + return s.internal.LoadTarManifest() } // GetManifest returns the image's manifest along with its MIME type (which may be empty when it can't be determined but the manifest is available). @@ -373,130 +68,26 @@ func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manif // This source implementation does not support manifest lists, so the passed-in instanceDigest should always be nil, // as the primary manifest can not be a list, so there can be no secondary instances. func (s *Source) GetManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { - if instanceDigest != nil { - // How did we even get here? GetManifest(ctx, nil) has returned a manifest.DockerV2Schema2MediaType. - return nil, "", errors.New(`Manifest lists are not supported by "docker-daemon:"`) - } - if s.generatedManifest == nil { - if err := s.ensureCachedDataIsPresent(); err != nil { - return nil, "", err - } - m := manifest.Schema2{ - SchemaVersion: 2, - MediaType: manifest.DockerV2Schema2MediaType, - ConfigDescriptor: manifest.Schema2Descriptor{ - MediaType: manifest.DockerV2Schema2ConfigMediaType, - Size: int64(len(s.configBytes)), - Digest: s.configDigest, - }, - LayersDescriptors: []manifest.Schema2Descriptor{}, - } - for _, diffID := range s.orderedDiffIDList { - li, ok := s.knownLayers[diffID] - if !ok { - return nil, "", errors.Errorf("Internal inconsistency: Information about layer %s missing", diffID) - } - m.LayersDescriptors = append(m.LayersDescriptors, manifest.Schema2Descriptor{ - Digest: diffID, // diffID is a digest of the uncompressed tarball - MediaType: manifest.DockerV2Schema2LayerMediaType, - Size: li.size, - }) - } - manifestBytes, err := json.Marshal(&m) - if err != nil { - return nil, "", err - } - s.generatedManifest = manifestBytes - } - return s.generatedManifest, manifest.DockerV2Schema2MediaType, nil -} - -// uncompressedReadCloser is an io.ReadCloser that closes both the uncompressed stream and the underlying input. -type uncompressedReadCloser struct { - io.Reader - underlyingCloser func() error - uncompressedCloser func() error -} - -func (r uncompressedReadCloser) Close() error { - var res error - if err := r.uncompressedCloser(); err != nil { - res = err - } - if err := r.underlyingCloser(); err != nil && res == nil { - res = err - } - return res + return s.internal.GetManifest(ctx, instanceDigest) } // HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently. func (s *Source) HasThreadSafeGetBlob() bool { - return true + return s.internal.HasThreadSafeGetBlob() } // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown). // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided. // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location. func (s *Source) GetBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache) (io.ReadCloser, int64, error) { - if err := s.ensureCachedDataIsPresent(); err != nil { - return nil, 0, err - } - - if info.Digest == s.configDigest { // FIXME? Implement a more general algorithm matching instead of assuming sha256. - return ioutil.NopCloser(bytes.NewReader(s.configBytes)), int64(len(s.configBytes)), nil - } - - if li, ok := s.knownLayers[info.Digest]; ok { // diffID is a digest of the uncompressed tarball, - underlyingStream, err := s.openTarComponent(li.path) - if err != nil { - return nil, 0, err - } - closeUnderlyingStream := true - defer func() { - if closeUnderlyingStream { - underlyingStream.Close() - } - }() - - // In order to handle the fact that digests != diffIDs (and thus that a - // caller which is trying to verify the blob will run into problems), - // we need to decompress blobs. This is a bit ugly, but it's a - // consequence of making everything addressable by their DiffID rather - // than by their digest... - // - // In particular, because the v2s2 manifest being generated uses - // DiffIDs, any caller of GetBlob is going to be asking for DiffIDs of - // layers not their _actual_ digest. The result is that copy/... will - // be verifing a "digest" which is not the actual layer's digest (but - // is instead the DiffID). - - uncompressedStream, _, err := compression.AutoDecompress(underlyingStream) - if err != nil { - return nil, 0, errors.Wrapf(err, "Error auto-decompressing blob %s", info.Digest) - } - - newStream := uncompressedReadCloser{ - Reader: uncompressedStream, - underlyingCloser: underlyingStream.Close, - uncompressedCloser: uncompressedStream.Close, - } - closeUnderlyingStream = false - - return newStream, li.size, nil - } - - return nil, 0, errors.Errorf("Unknown blob %s", info.Digest) + return s.internal.GetBlob(ctx, info, cache) } // GetSignatures returns the image's signatures. It may use a remote (= slow) service. // This source implementation does not support manifest lists, so the passed-in instanceDigest should always be nil, // as there can be no secondary manifests. func (s *Source) GetSignatures(ctx context.Context, instanceDigest *digest.Digest) ([][]byte, error) { - if instanceDigest != nil { - // How did we even get here? GetManifest(ctx, nil) has returned a manifest.DockerV2Schema2MediaType. - return nil, errors.Errorf(`Manifest lists are not supported by "docker-daemon:"`) - } - return [][]byte{}, nil + return s.internal.GetSignatures(ctx, instanceDigest) } // LayerInfosForCopy returns either nil (meaning the values in the manifest are fine), or updated values for the layer @@ -506,6 +97,6 @@ func (s *Source) GetSignatures(ctx context.Context, instanceDigest *digest.Diges // as the primary manifest can not be a list, so there can be no secondary manifests. // The Digest field is guaranteed to be provided; Size may be -1. // WARNING: The list may contain duplicates, and they are semantically relevant. -func (s *Source) LayerInfosForCopy(context.Context, *digest.Digest) ([]types.BlobInfo, error) { - return nil, nil +func (s *Source) LayerInfosForCopy(ctx context.Context, instanceDigest *digest.Digest) ([]types.BlobInfo, error) { + return s.internal.LayerInfosForCopy(ctx, instanceDigest) } diff --git a/docker/tarfile/types.go b/docker/tarfile/types.go index ac222528a0..d6c0a84250 100644 --- a/docker/tarfile/types.go +++ b/docker/tarfile/types.go @@ -1,8 +1,7 @@ package tarfile import ( - "github.com/containers/image/v5/manifest" - "github.com/opencontainers/go-digest" + internal "github.com/containers/image/v5/docker/internal/tarfile" ) // Various data structures. @@ -17,12 +16,4 @@ const ( ) // ManifestItem is an element of the array stored in the top-level manifest.json file. -type ManifestItem struct { - Config string - RepoTags []string - Layers []string - Parent imageID `json:",omitempty"` - LayerSources map[digest.Digest]manifest.Schema2Descriptor `json:",omitempty"` -} - -type imageID string +type ManifestItem = internal.ManifestItem // All public members from the internal package remain accessible. From d0f56fb460a7a1900ba130a4e6a3ee0728441baf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 19:24:00 +0200 Subject: [PATCH 24/35] Use the docker/internal/tarfile.Source from docker/daemon and docker/archive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... instead of the forwarding shims. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/archive/src.go | 2 +- docker/daemon/daemon_src.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/archive/src.go b/docker/archive/src.go index 6a628508d3..36612326d0 100644 --- a/docker/archive/src.go +++ b/docker/archive/src.go @@ -3,7 +3,7 @@ package archive import ( "context" - "github.com/containers/image/v5/docker/tarfile" + "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/types" "github.com/sirupsen/logrus" ) diff --git a/docker/daemon/daemon_src.go b/docker/daemon/daemon_src.go index 1827f811dc..f7582db104 100644 --- a/docker/daemon/daemon_src.go +++ b/docker/daemon/daemon_src.go @@ -3,7 +3,7 @@ package daemon import ( "context" - "github.com/containers/image/v5/docker/tarfile" + "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/types" "github.com/pkg/errors" ) From 4859364045f8e5ab23c56a118557b70fa0e07919 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 19:26:58 +0200 Subject: [PATCH 25/35] Remove deprecated non-SystemContext functions from docker/internal/tarfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a private API now, so we can just drop it; rename the ...With[System]Context variants to use the shorter names, and update all callers. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/archive/src.go | 2 +- docker/daemon/daemon_src.go | 2 +- docker/internal/tarfile/src.go | 22 +++------------------- docker/tarfile/src.go | 4 ++-- 4 files changed, 7 insertions(+), 23 deletions(-) diff --git a/docker/archive/src.go b/docker/archive/src.go index 36612326d0..8915a8efc0 100644 --- a/docker/archive/src.go +++ b/docker/archive/src.go @@ -19,7 +19,7 @@ func newImageSource(ctx context.Context, sys *types.SystemContext, ref archiveRe if ref.destinationRef != nil { logrus.Warnf("docker-archive: references are not supported for sources (ignoring)") } - src, err := tarfile.NewSourceFromFileWithContext(sys, ref.path) + src, err := tarfile.NewSourceFromFile(sys, ref.path) if err != nil { return nil, err } diff --git a/docker/daemon/daemon_src.go b/docker/daemon/daemon_src.go index f7582db104..dc0a91e5e1 100644 --- a/docker/daemon/daemon_src.go +++ b/docker/daemon/daemon_src.go @@ -35,7 +35,7 @@ func newImageSource(ctx context.Context, sys *types.SystemContext, ref daemonRef } defer inputStream.Close() - src, err := tarfile.NewSourceFromStreamWithSystemContext(sys, inputStream) + src, err := tarfile.NewSourceFromStream(sys, inputStream) if err != nil { return nil, err } diff --git a/docker/internal/tarfile/src.go b/docker/internal/tarfile/src.go index 77f4b0a862..f155f2bceb 100644 --- a/docker/internal/tarfile/src.go +++ b/docker/internal/tarfile/src.go @@ -47,14 +47,7 @@ type layerInfo struct { // To do for both the NewSourceFromFile and NewSourceFromStream functions // NewSourceFromFile returns a tarfile.Source for the specified path. -// Deprecated: Please use NewSourceFromFileWithContext which will allows you to configure temp directory -// for big files through SystemContext.BigFilesTemporaryDir -func NewSourceFromFile(path string) (*Source, error) { - return NewSourceFromFileWithContext(nil, path) -} - -// NewSourceFromFileWithContext returns a tarfile.Source for the specified path. -func NewSourceFromFileWithContext(sys *types.SystemContext, path string) (*Source, error) { +func NewSourceFromFile(sys *types.SystemContext, path string) (*Source, error) { file, err := os.Open(path) if err != nil { return nil, errors.Wrapf(err, "error opening file %q", path) @@ -73,22 +66,13 @@ func NewSourceFromFileWithContext(sys *types.SystemContext, path string) (*Sourc tarPath: path, }, nil } - return NewSourceFromStreamWithSystemContext(sys, stream) + return NewSourceFromStream(sys, stream) } // NewSourceFromStream returns a tarfile.Source for the specified inputStream, // which can be either compressed or uncompressed. The caller can close the // inputStream immediately after NewSourceFromFile returns. -// Deprecated: Please use NewSourceFromStreamWithSystemContext which will allows you to configure -// temp directory for big files through SystemContext.BigFilesTemporaryDir -func NewSourceFromStream(inputStream io.Reader) (*Source, error) { - return NewSourceFromStreamWithSystemContext(nil, inputStream) -} - -// NewSourceFromStreamWithSystemContext returns a tarfile.Source for the specified inputStream, -// which can be either compressed or uncompressed. The caller can close the -// inputStream immediately after NewSourceFromFile returns. -func NewSourceFromStreamWithSystemContext(sys *types.SystemContext, inputStream io.Reader) (*Source, error) { +func NewSourceFromStream(sys *types.SystemContext, inputStream io.Reader) (*Source, error) { // Save inputStream to a temporary file tarCopyFile, err := ioutil.TempFile(tmpdir.TemporaryDirectoryForBigFiles(sys), "docker-tar") if err != nil { diff --git a/docker/tarfile/src.go b/docker/tarfile/src.go index 0b313dff0d..a9c63c137a 100644 --- a/docker/tarfile/src.go +++ b/docker/tarfile/src.go @@ -24,7 +24,7 @@ func NewSourceFromFile(path string) (*Source, error) { // NewSourceFromFileWithContext returns a tarfile.Source for the specified path. func NewSourceFromFileWithContext(sys *types.SystemContext, path string) (*Source, error) { - src, err := internal.NewSourceFromFileWithContext(sys, path) + src, err := internal.NewSourceFromFile(sys, path) if err != nil { return nil, err } @@ -44,7 +44,7 @@ func NewSourceFromStream(inputStream io.Reader) (*Source, error) { // which can be either compressed or uncompressed. The caller can close the // inputStream immediately after NewSourceFromFile returns. func NewSourceFromStreamWithSystemContext(sys *types.SystemContext, inputStream io.Reader) (*Source, error) { - src, err := internal.NewSourceFromStreamWithSystemContext(sys, inputStream) + src, err := internal.NewSourceFromStream(sys, inputStream) if err != nil { return nil, err } From de0ae52706ed5206fa97178c29628a154c6ffa29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 20:24:08 +0200 Subject: [PATCH 26/35] Split docker/internal/tarfile.Reader from Source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... to eventually allow creating multiple Sources from a single Reader (= a temporary file containing a seekable/uncompressed copy of the archive). Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/reader.go | 186 ++++++++++++++++++++++++++++++ docker/internal/tarfile/src.go | 165 +++----------------------- 2 files changed, 201 insertions(+), 150 deletions(-) create mode 100644 docker/internal/tarfile/reader.go diff --git a/docker/internal/tarfile/reader.go b/docker/internal/tarfile/reader.go new file mode 100644 index 0000000000..d916b51eea --- /dev/null +++ b/docker/internal/tarfile/reader.go @@ -0,0 +1,186 @@ +package tarfile + +import ( + "archive/tar" + "io" + "io/ioutil" + "os" + "path" + + "github.com/containers/image/v5/internal/iolimits" + "github.com/containers/image/v5/internal/tmpdir" + "github.com/containers/image/v5/pkg/compression" + "github.com/containers/image/v5/types" + "github.com/pkg/errors" +) + +// Reader is a ((docker save)-formatted) tar archive that allows random access to any component. +type Reader struct { + path string + removeOnClose bool // Remove file on close if true +} + +// NewReaderFromFile returns a Reader for the specified path. +// The caller should call .Close() on the returned archive when done. +func NewReaderFromFile(sys *types.SystemContext, path string) (*Reader, error) { + file, err := os.Open(path) + if err != nil { + return nil, errors.Wrapf(err, "error opening file %q", path) + } + defer file.Close() + + // If the file is already not compressed we can just return the file itself + // as a source. Otherwise we pass the stream to NewReaderFromStream. + stream, isCompressed, err := compression.AutoDecompress(file) + if err != nil { + return nil, errors.Wrapf(err, "Error detecting compression for file %q", path) + } + defer stream.Close() + if !isCompressed { + return &Reader{ + path: path, + }, nil + } + return NewReaderFromStream(sys, stream) +} + +// NewReaderFromStream returns a Reader for the specified inputStream, +// which can be either compressed or uncompressed. The caller can close the +// inputStream immediately after NewReaderFromFile returns. +// The caller should call .Close() on the returned archive when done. +func NewReaderFromStream(sys *types.SystemContext, inputStream io.Reader) (*Reader, error) { + // Save inputStream to a temporary file + tarCopyFile, err := ioutil.TempFile(tmpdir.TemporaryDirectoryForBigFiles(sys), "docker-tar") + if err != nil { + return nil, errors.Wrap(err, "error creating temporary file") + } + defer tarCopyFile.Close() + + succeeded := false + defer func() { + if !succeeded { + os.Remove(tarCopyFile.Name()) + } + }() + + // In order to be compatible with docker-load, we need to support + // auto-decompression (it's also a nice quality-of-life thing to avoid + // giving users really confusing "invalid tar header" errors). + uncompressedStream, _, err := compression.AutoDecompress(inputStream) + if err != nil { + return nil, errors.Wrap(err, "Error auto-decompressing input") + } + defer uncompressedStream.Close() + + // Copy the plain archive to the temporary file. + // + // TODO: This can take quite some time, and should ideally be cancellable + // using a context.Context. + if _, err := io.Copy(tarCopyFile, uncompressedStream); err != nil { + return nil, errors.Wrapf(err, "error copying contents to temporary file %q", tarCopyFile.Name()) + } + succeeded = true + + return &Reader{ + path: tarCopyFile.Name(), + removeOnClose: true, + }, nil +} + +// Close removes resources associated with an initialized Reader, if any. +func (r *Reader) Close() error { + if r.removeOnClose { + return os.Remove(r.path) + } + return nil +} + +// tarReadCloser is a way to close the backing file of a tar.Reader when the user no longer needs the tar component. +type tarReadCloser struct { + *tar.Reader + backingFile *os.File +} + +func (t *tarReadCloser) Close() error { + return t.backingFile.Close() +} + +// openTarComponent returns a ReadCloser for the specific file within the archive. +// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers), +// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough. +// The caller should call .Close() on the returned stream. +func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) { + f, err := os.Open(r.path) + if err != nil { + return nil, err + } + succeeded := false + defer func() { + if !succeeded { + f.Close() + } + }() + + tarReader, header, err := findTarComponent(f, componentPath) + if err != nil { + return nil, err + } + if header == nil { + return nil, os.ErrNotExist + } + if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // FIXME: untested + // We follow only one symlink; so no loops are possible. + if _, err := f.Seek(0, io.SeekStart); err != nil { + return nil, err + } + // The new path could easily point "outside" the archive, but we only compare it to existing tar headers without extracting the archive, + // so we don't care. + tarReader, header, err = findTarComponent(f, path.Join(path.Dir(componentPath), header.Linkname)) + if err != nil { + return nil, err + } + if header == nil { + return nil, os.ErrNotExist + } + } + + if !header.FileInfo().Mode().IsRegular() { + return nil, errors.Errorf("Error reading tar archive component %s: not a regular file", header.Name) + } + succeeded = true + return &tarReadCloser{Reader: tarReader, backingFile: f}, nil +} + +// findTarComponent returns a header and a reader matching componentPath within inputFile, +// or (nil, nil, nil) if not found. +func findTarComponent(inputFile io.Reader, componentPath string) (*tar.Reader, *tar.Header, error) { + t := tar.NewReader(inputFile) + componentPath = path.Clean(componentPath) + for { + h, err := t.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, nil, err + } + if path.Clean(h.Name) == componentPath { + return t, h, nil + } + } + return nil, nil, nil +} + +// readTarComponent returns full contents of componentPath. +func (r *Reader) readTarComponent(path string, limit int) ([]byte, error) { + file, err := r.openTarComponent(path) + if err != nil { + return nil, errors.Wrapf(err, "Error loading tar component %s", path) + } + defer file.Close() + bytes, err := iolimits.ReadAtMost(file, limit) + if err != nil { + return nil, err + } + return bytes, nil +} diff --git a/docker/internal/tarfile/src.go b/docker/internal/tarfile/src.go index f155f2bceb..017126ed38 100644 --- a/docker/internal/tarfile/src.go +++ b/docker/internal/tarfile/src.go @@ -12,7 +12,6 @@ import ( "sync" "github.com/containers/image/v5/internal/iolimits" - "github.com/containers/image/v5/internal/tmpdir" "github.com/containers/image/v5/manifest" "github.com/containers/image/v5/pkg/compression" "github.com/containers/image/v5/types" @@ -22,8 +21,7 @@ import ( // Source is a partial implementation of types.ImageSource for reading from tarPath. type Source struct { - tarPath string - removeTarPathOnClose bool // Remove temp file on close if true + archive *Reader // The following data is only available after ensureCachedDataIsPresent() succeeds tarManifest *ManifestItem // nil if not available yet. configBytes []byte @@ -48,159 +46,28 @@ type layerInfo struct { // NewSourceFromFile returns a tarfile.Source for the specified path. func NewSourceFromFile(sys *types.SystemContext, path string) (*Source, error) { - file, err := os.Open(path) + archive, err := NewReaderFromFile(sys, path) if err != nil { - return nil, errors.Wrapf(err, "error opening file %q", path) - } - defer file.Close() - - // If the file is already not compressed we can just return the file itself - // as a source. Otherwise we pass the stream to NewSourceFromStream. - stream, isCompressed, err := compression.AutoDecompress(file) - if err != nil { - return nil, errors.Wrapf(err, "Error detecting compression for file %q", path) - } - defer stream.Close() - if !isCompressed { - return &Source{ - tarPath: path, - }, nil + return nil, err } - return NewSourceFromStream(sys, stream) + return &Source{ + archive: archive, + }, nil } // NewSourceFromStream returns a tarfile.Source for the specified inputStream, // which can be either compressed or uncompressed. The caller can close the // inputStream immediately after NewSourceFromFile returns. func NewSourceFromStream(sys *types.SystemContext, inputStream io.Reader) (*Source, error) { - // Save inputStream to a temporary file - tarCopyFile, err := ioutil.TempFile(tmpdir.TemporaryDirectoryForBigFiles(sys), "docker-tar") + archive, err := NewReaderFromStream(sys, inputStream) if err != nil { - return nil, errors.Wrap(err, "error creating temporary file") - } - defer tarCopyFile.Close() - - succeeded := false - defer func() { - if !succeeded { - os.Remove(tarCopyFile.Name()) - } - }() - - // In order to be compatible with docker-load, we need to support - // auto-decompression (it's also a nice quality-of-life thing to avoid - // giving users really confusing "invalid tar header" errors). - uncompressedStream, _, err := compression.AutoDecompress(inputStream) - if err != nil { - return nil, errors.Wrap(err, "Error auto-decompressing input") - } - defer uncompressedStream.Close() - - // Copy the plain archive to the temporary file. - // - // TODO: This can take quite some time, and should ideally be cancellable - // using a context.Context. - if _, err := io.Copy(tarCopyFile, uncompressedStream); err != nil { - return nil, errors.Wrapf(err, "error copying contents to temporary file %q", tarCopyFile.Name()) + return nil, err } - succeeded = true - return &Source{ - tarPath: tarCopyFile.Name(), - removeTarPathOnClose: true, + archive: archive, }, nil } -// tarReadCloser is a way to close the backing file of a tar.Reader when the user no longer needs the tar component. -type tarReadCloser struct { - *tar.Reader - backingFile *os.File -} - -func (t *tarReadCloser) Close() error { - return t.backingFile.Close() -} - -// openTarComponent returns a ReadCloser for the specific file within the archive. -// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers), -// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough. -// The caller should call .Close() on the returned stream. -func (s *Source) openTarComponent(componentPath string) (io.ReadCloser, error) { - f, err := os.Open(s.tarPath) - if err != nil { - return nil, err - } - succeeded := false - defer func() { - if !succeeded { - f.Close() - } - }() - - tarReader, header, err := findTarComponent(f, componentPath) - if err != nil { - return nil, err - } - if header == nil { - return nil, os.ErrNotExist - } - if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // FIXME: untested - // We follow only one symlink; so no loops are possible. - if _, err := f.Seek(0, io.SeekStart); err != nil { - return nil, err - } - // The new path could easily point "outside" the archive, but we only compare it to existing tar headers without extracting the archive, - // so we don't care. - tarReader, header, err = findTarComponent(f, path.Join(path.Dir(componentPath), header.Linkname)) - if err != nil { - return nil, err - } - if header == nil { - return nil, os.ErrNotExist - } - } - - if !header.FileInfo().Mode().IsRegular() { - return nil, errors.Errorf("Error reading tar archive component %s: not a regular file", header.Name) - } - succeeded = true - return &tarReadCloser{Reader: tarReader, backingFile: f}, nil -} - -// findTarComponent returns a header and a reader matching componentPath within inputFile, -// or (nil, nil, nil) if not found. -func findTarComponent(inputFile io.Reader, componentPath string) (*tar.Reader, *tar.Header, error) { - t := tar.NewReader(inputFile) - componentPath = path.Clean(componentPath) - for { - h, err := t.Next() - if err == io.EOF { - break - } - if err != nil { - return nil, nil, err - } - if path.Clean(h.Name) == componentPath { - return t, h, nil - } - } - return nil, nil, nil -} - -// readTarComponent returns full contents of componentPath. -func (s *Source) readTarComponent(path string, limit int) ([]byte, error) { - file, err := s.openTarComponent(path) - if err != nil { - return nil, errors.Wrapf(err, "Error loading tar component %s", path) - } - defer file.Close() - bytes, err := iolimits.ReadAtMost(file, limit) - if err != nil { - return nil, err - } - return bytes, nil -} - // ensureCachedDataIsPresent loads data necessary for any of the public accessors. // It is safe to call this from multi-threaded code. func (s *Source) ensureCachedDataIsPresent() error { @@ -225,7 +92,7 @@ func (s *Source) ensureCachedDataIsPresentPrivate() error { } // Read and parse config. - configBytes, err := s.readTarComponent(tarManifest[0].Config, iolimits.MaxConfigBodySize) + configBytes, err := s.archive.readTarComponent(tarManifest[0].Config, iolimits.MaxConfigBodySize) if err != nil { return err } @@ -254,7 +121,7 @@ func (s *Source) ensureCachedDataIsPresentPrivate() error { // loadTarManifest loads and decodes the manifest.json. func (s *Source) loadTarManifest() ([]ManifestItem, error) { // FIXME? Do we need to deal with the legacy format? - bytes, err := s.readTarComponent(manifestFileName, iolimits.MaxTarFileManifestSize) + bytes, err := s.archive.readTarComponent(manifestFileName, iolimits.MaxTarFileManifestSize) if err != nil { return nil, err } @@ -267,10 +134,7 @@ func (s *Source) loadTarManifest() ([]ManifestItem, error) { // Close removes resources associated with an initialized Source, if any. func (s *Source) Close() error { - if s.removeTarPathOnClose { - return os.Remove(s.tarPath) - } - return nil + return s.archive.Close() } // LoadTarManifest loads and decodes the manifest.json @@ -305,7 +169,7 @@ func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manif } // Scan the tar file to collect layer sizes. - file, err := os.Open(s.tarPath) + file, err := os.Open(s.archive.path) if err != nil { return nil, err } @@ -320,6 +184,7 @@ func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manif return nil, err } layerPath := path.Clean(h.Name) + // FIXME: Cache this data across images in Reader. if li, ok := unknownLayerSizes[layerPath]; ok { // Since GetBlob will decompress layers that are compressed we need // to do the decompression here as well, otherwise we will @@ -431,7 +296,7 @@ func (s *Source) GetBlob(ctx context.Context, info types.BlobInfo, cache types.B } if li, ok := s.knownLayers[info.Digest]; ok { // diffID is a digest of the uncompressed tarball, - underlyingStream, err := s.openTarComponent(li.path) + underlyingStream, err := s.archive.openTarComponent(li.path) if err != nil { return nil, 0, err } From a9767e75b0aec1154bfa32453fb592e2c7a493f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 20:38:57 +0200 Subject: [PATCH 27/35] Separate tarfile.Reader creation from Source creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least docker/archive will need to deal with the two optionally separately, so such a separate constructor must exist; and the other callers are not much more complex if they separate it as well. This will also allow us to add reference lookup without having to duplicate the API. As a concession to the simpler callers, add a closeArchive parameter (currently always true) that allows them not to worry about the lifetime of the tarfile.Reader. Should not change behavior. Signed-off-by: Miloslav Trmač --- docker/archive/src.go | 3 ++- docker/daemon/daemon_src.go | 3 ++- docker/internal/tarfile/src.go | 33 +++++++++++---------------------- docker/tarfile/src.go | 6 ++++-- 4 files changed, 19 insertions(+), 26 deletions(-) diff --git a/docker/archive/src.go b/docker/archive/src.go index 8915a8efc0..cfeb6bee18 100644 --- a/docker/archive/src.go +++ b/docker/archive/src.go @@ -19,10 +19,11 @@ func newImageSource(ctx context.Context, sys *types.SystemContext, ref archiveRe if ref.destinationRef != nil { logrus.Warnf("docker-archive: references are not supported for sources (ignoring)") } - src, err := tarfile.NewSourceFromFile(sys, ref.path) + archive, err := tarfile.NewReaderFromFile(sys, ref.path) if err != nil { return nil, err } + src := tarfile.NewSource(archive, true) return &archiveImageSource{ Source: src, ref: ref, diff --git a/docker/daemon/daemon_src.go b/docker/daemon/daemon_src.go index dc0a91e5e1..8cca3a893d 100644 --- a/docker/daemon/daemon_src.go +++ b/docker/daemon/daemon_src.go @@ -35,10 +35,11 @@ func newImageSource(ctx context.Context, sys *types.SystemContext, ref daemonRef } defer inputStream.Close() - src, err := tarfile.NewSourceFromStream(sys, inputStream) + archive, err := tarfile.NewReaderFromStream(sys, inputStream) if err != nil { return nil, err } + src := tarfile.NewSource(archive, true) return &daemonImageSource{ ref: ref, Source: src, diff --git a/docker/internal/tarfile/src.go b/docker/internal/tarfile/src.go index 017126ed38..9b13d16b48 100644 --- a/docker/internal/tarfile/src.go +++ b/docker/internal/tarfile/src.go @@ -21,7 +21,8 @@ import ( // Source is a partial implementation of types.ImageSource for reading from tarPath. type Source struct { - archive *Reader + archive *Reader + closeArchive bool // .Close() the archive when the source is closed. // The following data is only available after ensureCachedDataIsPresent() succeeds tarManifest *ManifestItem // nil if not available yet. configBytes []byte @@ -44,28 +45,13 @@ type layerInfo struct { // the source of an image. // To do for both the NewSourceFromFile and NewSourceFromStream functions -// NewSourceFromFile returns a tarfile.Source for the specified path. -func NewSourceFromFile(sys *types.SystemContext, path string) (*Source, error) { - archive, err := NewReaderFromFile(sys, path) - if err != nil { - return nil, err - } +// NewSource returns a tarfile.Source for the only image in the specified archive. +// The archive will be closed if closeArchive +func NewSource(archive *Reader, closeArchive bool) *Source { return &Source{ - archive: archive, - }, nil -} - -// NewSourceFromStream returns a tarfile.Source for the specified inputStream, -// which can be either compressed or uncompressed. The caller can close the -// inputStream immediately after NewSourceFromFile returns. -func NewSourceFromStream(sys *types.SystemContext, inputStream io.Reader) (*Source, error) { - archive, err := NewReaderFromStream(sys, inputStream) - if err != nil { - return nil, err + archive: archive, + closeArchive: closeArchive, } - return &Source{ - archive: archive, - }, nil } // ensureCachedDataIsPresent loads data necessary for any of the public accessors. @@ -134,7 +120,10 @@ func (s *Source) loadTarManifest() ([]ManifestItem, error) { // Close removes resources associated with an initialized Source, if any. func (s *Source) Close() error { - return s.archive.Close() + if s.closeArchive { + return s.archive.Close() + } + return nil } // LoadTarManifest loads and decodes the manifest.json diff --git a/docker/tarfile/src.go b/docker/tarfile/src.go index a9c63c137a..4913f7dc6b 100644 --- a/docker/tarfile/src.go +++ b/docker/tarfile/src.go @@ -24,10 +24,11 @@ func NewSourceFromFile(path string) (*Source, error) { // NewSourceFromFileWithContext returns a tarfile.Source for the specified path. func NewSourceFromFileWithContext(sys *types.SystemContext, path string) (*Source, error) { - src, err := internal.NewSourceFromFile(sys, path) + archive, err := internal.NewReaderFromFile(sys, path) if err != nil { return nil, err } + src := internal.NewSource(archive, true) return &Source{internal: src}, nil } @@ -44,10 +45,11 @@ func NewSourceFromStream(inputStream io.Reader) (*Source, error) { // which can be either compressed or uncompressed. The caller can close the // inputStream immediately after NewSourceFromFile returns. func NewSourceFromStreamWithSystemContext(sys *types.SystemContext, inputStream io.Reader) (*Source, error) { - src, err := internal.NewSourceFromStream(sys, inputStream) + archive, err := internal.NewReaderFromStream(sys, inputStream) if err != nil { return nil, err } + src := internal.NewSource(archive, true) return &Source{internal: src}, nil } From ca4e06af444f3750690a1c5e036101ec29560564 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 21:16:23 +0200 Subject: [PATCH 28/35] Read the tarfile manifest already when initializing tarfile.Reader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every caller will need that data; this way it can be shared across several consumers, and we don't need to synchronize access/creation of the parsed data structure. Should not change behavior, except that errors are now reported earlier. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/reader.go | 46 +++++++++++++++++++++++++------ docker/internal/tarfile/src.go | 37 ++++++------------------- 2 files changed, 47 insertions(+), 36 deletions(-) diff --git a/docker/internal/tarfile/reader.go b/docker/internal/tarfile/reader.go index d916b51eea..52f13298ff 100644 --- a/docker/internal/tarfile/reader.go +++ b/docker/internal/tarfile/reader.go @@ -2,6 +2,7 @@ package tarfile import ( "archive/tar" + "encoding/json" "io" "io/ioutil" "os" @@ -17,7 +18,8 @@ import ( // Reader is a ((docker save)-formatted) tar archive that allows random access to any component. type Reader struct { path string - removeOnClose bool // Remove file on close if true + removeOnClose bool // Remove file on close if true + Manifest []ManifestItem // Guaranteed to exist after the archive is created. } // NewReaderFromFile returns a Reader for the specified path. @@ -37,9 +39,7 @@ func NewReaderFromFile(sys *types.SystemContext, path string) (*Reader, error) { } defer stream.Close() if !isCompressed { - return &Reader{ - path: path, - }, nil + return newReader(path, false) } return NewReaderFromStream(sys, stream) } @@ -81,10 +81,40 @@ func NewReaderFromStream(sys *types.SystemContext, inputStream io.Reader) (*Read } succeeded = true - return &Reader{ - path: tarCopyFile.Name(), - removeOnClose: true, - }, nil + return newReader(tarCopyFile.Name(), true) +} + +// newReader creates a Reader for the specified path and removeOnClose flag. +// The caller should call .Close() on the returned archive when done. +func newReader(path string, removeOnClose bool) (*Reader, error) { + // This is a valid enough archive, except Manifest is not yet filled. + r := Reader{ + path: path, + removeOnClose: removeOnClose, + } + succeeded := false + defer func() { + if !succeeded { + r.Close() + } + }() + + // We initialize Manifest immediately when constructing the Reader instead + // of later on-demand because every caller will need the data, and because doing it now + // removes the need to synchronize the access/creation of the data if the archive is later + // used from multiple goroutines to access different images. + + // FIXME? Do we need to deal with the legacy format? + bytes, err := r.readTarComponent(manifestFileName, iolimits.MaxTarFileManifestSize) + if err != nil { + return nil, err + } + if err := json.Unmarshal(bytes, &r.Manifest); err != nil { + return nil, errors.Wrap(err, "Error decoding tar manifest.json") + } + + succeeded = true + return &r, nil } // Close removes resources associated with an initialized Reader, if any. diff --git a/docker/internal/tarfile/src.go b/docker/internal/tarfile/src.go index 9b13d16b48..2b3735bc36 100644 --- a/docker/internal/tarfile/src.go +++ b/docker/internal/tarfile/src.go @@ -66,37 +66,32 @@ func (s *Source) ensureCachedDataIsPresent() error { // ensureCachedDataIsPresentPrivate is a private implementation detail of ensureCachedDataIsPresent. // Call ensureCachedDataIsPresent instead. func (s *Source) ensureCachedDataIsPresentPrivate() error { - // Read and parse manifest.json - tarManifest, err := s.loadTarManifest() - if err != nil { - return err - } - // Check to make sure length is 1 - if len(tarManifest) != 1 { - return errors.Errorf("Unexpected tar manifest.json: expected 1 item, got %d", len(tarManifest)) + if len(s.archive.Manifest) != 1 { + return errors.Errorf("Unexpected tar manifest.json: expected 1 item, got %d", len(s.archive.Manifest)) } + tarManifest := &s.archive.Manifest[0] // Read and parse config. - configBytes, err := s.archive.readTarComponent(tarManifest[0].Config, iolimits.MaxConfigBodySize) + configBytes, err := s.archive.readTarComponent(tarManifest.Config, iolimits.MaxConfigBodySize) if err != nil { return err } var parsedConfig manifest.Schema2Image // There's a lot of info there, but we only really care about layer DiffIDs. if err := json.Unmarshal(configBytes, &parsedConfig); err != nil { - return errors.Wrapf(err, "Error decoding tar config %s", tarManifest[0].Config) + return errors.Wrapf(err, "Error decoding tar config %s", tarManifest.Config) } if parsedConfig.RootFS == nil { - return errors.Errorf("Invalid image config (rootFS is not set): %s", tarManifest[0].Config) + return errors.Errorf("Invalid image config (rootFS is not set): %s", tarManifest.Config) } - knownLayers, err := s.prepareLayerData(&tarManifest[0], &parsedConfig) + knownLayers, err := s.prepareLayerData(tarManifest, &parsedConfig) if err != nil { return err } // Success; commit. - s.tarManifest = &tarManifest[0] + s.tarManifest = tarManifest s.configBytes = configBytes s.configDigest = digest.FromBytes(configBytes) s.orderedDiffIDList = parsedConfig.RootFS.DiffIDs @@ -104,20 +99,6 @@ func (s *Source) ensureCachedDataIsPresentPrivate() error { return nil } -// loadTarManifest loads and decodes the manifest.json. -func (s *Source) loadTarManifest() ([]ManifestItem, error) { - // FIXME? Do we need to deal with the legacy format? - bytes, err := s.archive.readTarComponent(manifestFileName, iolimits.MaxTarFileManifestSize) - if err != nil { - return nil, err - } - var items []ManifestItem - if err := json.Unmarshal(bytes, &items); err != nil { - return nil, errors.Wrap(err, "Error decoding tar manifest.json") - } - return items, nil -} - // Close removes resources associated with an initialized Source, if any. func (s *Source) Close() error { if s.closeArchive { @@ -128,7 +109,7 @@ func (s *Source) Close() error { // LoadTarManifest loads and decodes the manifest.json func (s *Source) LoadTarManifest() ([]ManifestItem, error) { - return s.loadTarManifest() + return s.archive.Manifest, nil } func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manifest.Schema2Image) (map[digest.Digest]*layerInfo, error) { From dbd47615981841c7f0f0a2d760696d27cdaef7fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 29 Jul 2020 18:29:16 +0200 Subject: [PATCH 29/35] Turn tarfile.Source.LoadTarManifest into a TarManifest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can drop the unused error return value now. Signed-off-by: Miloslav Trmač --- docker/internal/tarfile/src.go | 6 +++--- docker/tarfile/src.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/internal/tarfile/src.go b/docker/internal/tarfile/src.go index 2b3735bc36..bcb74faa8f 100644 --- a/docker/internal/tarfile/src.go +++ b/docker/internal/tarfile/src.go @@ -107,9 +107,9 @@ func (s *Source) Close() error { return nil } -// LoadTarManifest loads and decodes the manifest.json -func (s *Source) LoadTarManifest() ([]ManifestItem, error) { - return s.archive.Manifest, nil +// TarManifest returns contents of manifest.json +func (s *Source) TarManifest() []ManifestItem { + return s.archive.Manifest } func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manifest.Schema2Image) (map[digest.Digest]*layerInfo, error) { diff --git a/docker/tarfile/src.go b/docker/tarfile/src.go index 4913f7dc6b..cbbf462a3f 100644 --- a/docker/tarfile/src.go +++ b/docker/tarfile/src.go @@ -60,7 +60,7 @@ func (s *Source) Close() error { // LoadTarManifest loads and decodes the manifest.json func (s *Source) LoadTarManifest() ([]ManifestItem, error) { - return s.internal.LoadTarManifest() + return s.internal.TarManifest(), nil } // GetManifest returns the image's manifest along with its MIME type (which may be empty when it can't be determined but the manifest is available). From 702986044b23c1e01718d18ec3bc43091d1d2d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 21:03:11 +0200 Subject: [PATCH 30/35] Allow choosing an image from tarfile.Reader by reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already accept the syntax for docker-archive: references, now implement the lookup instead of warning and ignoring the value. Signed-off-by: Miloslav Trmač --- docker/archive/dest.go | 2 +- docker/archive/src.go | 6 +---- docker/archive/transport.go | 31 +++++++++++---------- docker/archive/transport_test.go | 22 +++++++-------- docker/daemon/daemon_src.go | 2 +- docker/internal/tarfile/src.go | 46 +++++++++++++++++++++++--------- docker/tarfile/src.go | 4 +-- docs/containers-transports.5.md | 4 ++- 8 files changed, 68 insertions(+), 49 deletions(-) diff --git a/docker/archive/dest.go b/docker/archive/dest.go index 1cf197429b..272041a30b 100644 --- a/docker/archive/dest.go +++ b/docker/archive/dest.go @@ -36,7 +36,7 @@ func newImageDestination(sys *types.SystemContext, ref archiveReference) (types. return nil, errors.New("docker-archive doesn't support modifying existing images") } - tarDest := tarfile.NewDestinationWithContext(sys, fh, ref.destinationRef) + tarDest := tarfile.NewDestinationWithContext(sys, fh, ref.ref) if sys != nil && sys.DockerArchiveAdditionalTags != nil { tarDest.AddRepoTags(sys.DockerArchiveAdditionalTags) } diff --git a/docker/archive/src.go b/docker/archive/src.go index cfeb6bee18..bbb4af92bf 100644 --- a/docker/archive/src.go +++ b/docker/archive/src.go @@ -5,7 +5,6 @@ import ( "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/types" - "github.com/sirupsen/logrus" ) type archiveImageSource struct { @@ -16,14 +15,11 @@ type archiveImageSource struct { // newImageSource returns a types.ImageSource for the specified image reference. // The caller must call .Close() on the returned ImageSource. func newImageSource(ctx context.Context, sys *types.SystemContext, ref archiveReference) (types.ImageSource, error) { - if ref.destinationRef != nil { - logrus.Warnf("docker-archive: references are not supported for sources (ignoring)") - } archive, err := tarfile.NewReaderFromFile(sys, ref.path) if err != nil { return nil, err } - src := tarfile.NewSource(archive, true) + src := tarfile.NewSource(archive, true, ref.ref) return &archiveImageSource{ Source: src, ref: ref, diff --git a/docker/archive/transport.go b/docker/archive/transport.go index 4c35dc290e..558df9d0f7 100644 --- a/docker/archive/transport.go +++ b/docker/archive/transport.go @@ -42,9 +42,8 @@ func (t archiveTransport) ValidatePolicyConfigurationScope(scope string) error { // archiveReference is an ImageReference for Docker images. type archiveReference struct { path string - // only used for destinations, - // archiveReference.destinationRef is optional and can be nil for destinations as well. - destinationRef reference.NamedTagged + // May be nil to read the only image in an archive, or to create an untagged image. + ref reference.NamedTagged } // ParseReference converts a string, which should not start with the ImageTransport.Name prefix, into an Docker ImageReference. @@ -55,10 +54,10 @@ func ParseReference(refString string) (types.ImageReference, error) { parts := strings.SplitN(refString, ":", 2) path := parts[0] - var destinationRef reference.NamedTagged + var nt reference.NamedTagged - // A :tag was specified, which is only necessary for destinations. if len(parts) == 2 { + // A :tag was specified. ref, err := reference.ParseNormalizedNamed(parts[1]) if err != nil { return nil, errors.Wrapf(err, "docker-archive parsing reference") @@ -68,23 +67,23 @@ func ParseReference(refString string) (types.ImageReference, error) { if !isTagged { // If ref contains a digest, TagNameOnly does not change it return nil, errors.Errorf("reference does not include a tag: %s", ref.String()) } - destinationRef = refTagged + nt = refTagged } - return NewReference(path, destinationRef) + return NewReference(path, nt) } -// NewReference rethrns a Docker archive reference for a path and an optional destination reference. -func NewReference(path string, destinationRef reference.NamedTagged) (types.ImageReference, error) { +// NewReference returns a Docker archive reference for a path and an optional reference. +func NewReference(path string, ref reference.NamedTagged) (types.ImageReference, error) { if strings.Contains(path, ":") { return nil, errors.Errorf("Invalid docker-archive: reference: colon in path %q is not supported", path) } - if _, isDigest := destinationRef.(reference.Canonical); isDigest { - return nil, errors.Errorf("docker-archive doesn't support digest references: %s", destinationRef.String()) + if _, isDigest := ref.(reference.Canonical); isDigest { + return nil, errors.Errorf("docker-archive doesn't support digest references: %s", ref.String()) } return archiveReference{ - path: path, - destinationRef: destinationRef, + path: path, + ref: ref, }, nil } @@ -98,17 +97,17 @@ func (ref archiveReference) Transport() types.ImageTransport { // e.g. default attribute values omitted by the user may be filled in in the return value, or vice versa. // WARNING: Do not use the return value in the UI to describe an image, it does not contain the Transport().Name() prefix. func (ref archiveReference) StringWithinTransport() string { - if ref.destinationRef == nil { + if ref.ref == nil { return ref.path } - return fmt.Sprintf("%s:%s", ref.path, ref.destinationRef.String()) + return fmt.Sprintf("%s:%s", ref.path, ref.ref.String()) } // DockerReference returns a Docker reference associated with this reference // (fully explicit, i.e. !reference.IsNameOnly, but reflecting user intent, // not e.g. after redirect or alias processing), or nil if unknown/not applicable. func (ref archiveReference) DockerReference() reference.Named { - return ref.destinationRef + return ref.ref } // PolicyConfigurationIdentity returns a string representation of the reference, suitable for policy lookup. diff --git a/docker/archive/transport_test.go b/docker/archive/transport_test.go index 632ce4f336..7b4747d64e 100644 --- a/docker/archive/transport_test.go +++ b/docker/archive/transport_test.go @@ -67,10 +67,10 @@ func testParseReference(t *testing.T, fn func(string) (types.ImageReference, err require.True(t, ok, c.input) assert.Equal(t, c.expectedPath, archiveRef.path, c.input) if c.expectedRef == "" { - assert.Nil(t, archiveRef.destinationRef, c.input) + assert.Nil(t, archiveRef.ref, c.input) } else { - require.NotNil(t, archiveRef.destinationRef, c.input) - assert.Equal(t, c.expectedRef, archiveRef.destinationRef.String(), c.input) + require.NotNil(t, archiveRef.ref, c.input) + assert.Equal(t, c.expectedRef, archiveRef.ref.String(), c.input) } } } @@ -104,10 +104,10 @@ func TestNewReference(t *testing.T) { require.True(t, ok, c.ref) assert.Equal(t, path, archiveRef.path) if c.ref == "" { - assert.Nil(t, archiveRef.destinationRef, c.ref) + assert.Nil(t, archiveRef.ref, c.ref) } else { - require.NotNil(t, archiveRef.destinationRef, c.ref) - assert.Equal(t, ntRef.String(), archiveRef.destinationRef.String(), c.ref) + require.NotNil(t, archiveRef.ref, c.ref) + assert.Equal(t, ntRef.String(), archiveRef.ref.String(), c.ref) } } @@ -176,21 +176,21 @@ func TestReferencePolicyConfigurationNamespaces(t *testing.T) { } func TestReferenceNewImage(t *testing.T) { - for _, suffix := range []string{"", ":thisisignoredbutaccepted"} { + for _, suffix := range []string{"", ":emptyimage:latest"} { ref, err := ParseReference(tarFixture + suffix) require.NoError(t, err, suffix) img, err := ref.NewImage(context.Background(), nil) - assert.NoError(t, err, suffix) + require.NoError(t, err, suffix) defer img.Close() } } func TestReferenceNewImageSource(t *testing.T) { - for _, suffix := range []string{"", ":thisisignoredbutaccepted"} { + for _, suffix := range []string{"", ":emptyimage:latest"} { ref, err := ParseReference(tarFixture + suffix) require.NoError(t, err, suffix) src, err := ref.NewImageSource(context.Background(), nil) - assert.NoError(t, err, suffix) + require.NoError(t, err, suffix) defer src.Close() } } @@ -218,7 +218,7 @@ func TestReferenceDeleteImage(t *testing.T) { require.NoError(t, err) defer os.RemoveAll(tmpDir) - for i, suffix := range []string{"", ":thisisignoredbutaccepted"} { + for i, suffix := range []string{"", ":some-reference"} { testFile := filepath.Join(tmpDir, fmt.Sprintf("file%d.tar", i)) err := ioutil.WriteFile(testFile, []byte("nonempty"), 0644) require.NoError(t, err, suffix) diff --git a/docker/daemon/daemon_src.go b/docker/daemon/daemon_src.go index 8cca3a893d..3572d53bb8 100644 --- a/docker/daemon/daemon_src.go +++ b/docker/daemon/daemon_src.go @@ -39,7 +39,7 @@ func newImageSource(ctx context.Context, sys *types.SystemContext, ref daemonRef if err != nil { return nil, err } - src := tarfile.NewSource(archive, true) + src := tarfile.NewSource(archive, true, nil) return &daemonImageSource{ ref: ref, Source: src, diff --git a/docker/internal/tarfile/src.go b/docker/internal/tarfile/src.go index bcb74faa8f..a5ddf6d0ae 100644 --- a/docker/internal/tarfile/src.go +++ b/docker/internal/tarfile/src.go @@ -11,6 +11,7 @@ import ( "path" "sync" + "github.com/containers/image/v5/docker/reference" "github.com/containers/image/v5/internal/iolimits" "github.com/containers/image/v5/manifest" "github.com/containers/image/v5/pkg/compression" @@ -22,7 +23,8 @@ import ( // Source is a partial implementation of types.ImageSource for reading from tarPath. type Source struct { archive *Reader - closeArchive bool // .Close() the archive when the source is closed. + closeArchive bool // .Close() the archive when the source is closed. + ref reference.NamedTagged // May be nil to indicate the only image in the archive // The following data is only available after ensureCachedDataIsPresent() succeeds tarManifest *ManifestItem // nil if not available yet. configBytes []byte @@ -40,17 +42,14 @@ type layerInfo struct { size int64 } -// TODO: We could add support for multiple images in a single archive, so -// that people could use docker-archive:opensuse.tar:opensuse:leap as -// the source of an image. -// To do for both the NewSourceFromFile and NewSourceFromStream functions - -// NewSource returns a tarfile.Source for the only image in the specified archive. +// NewSource returns a tarfile.Source for an image in the specified archive matching ref +// (or the only image if ref is nil). // The archive will be closed if closeArchive -func NewSource(archive *Reader, closeArchive bool) *Source { +func NewSource(archive *Reader, closeArchive bool, ref reference.NamedTagged) *Source { return &Source{ archive: archive, closeArchive: closeArchive, + ref: ref, } } @@ -66,11 +65,10 @@ func (s *Source) ensureCachedDataIsPresent() error { // ensureCachedDataIsPresentPrivate is a private implementation detail of ensureCachedDataIsPresent. // Call ensureCachedDataIsPresent instead. func (s *Source) ensureCachedDataIsPresentPrivate() error { - // Check to make sure length is 1 - if len(s.archive.Manifest) != 1 { - return errors.Errorf("Unexpected tar manifest.json: expected 1 item, got %d", len(s.archive.Manifest)) + tarManifest, err := s.chooseManifest() + if err != nil { + return err } - tarManifest := &s.archive.Manifest[0] // Read and parse config. configBytes, err := s.archive.readTarComponent(tarManifest.Config, iolimits.MaxConfigBodySize) @@ -99,6 +97,30 @@ func (s *Source) ensureCachedDataIsPresentPrivate() error { return nil } +// chooseManifest selects a manifest item s.ref. +func (s *Source) chooseManifest() (*ManifestItem, error) { + if s.ref == nil { + if len(s.archive.Manifest) != 1 { + return nil, errors.Errorf("Unexpected tar manifest.json: expected 1 item, got %d", len(s.archive.Manifest)) + } + return &s.archive.Manifest[0], nil + } + + refString := s.ref.String() + for i := range s.archive.Manifest { + for _, tag := range s.archive.Manifest[i].RepoTags { + parsedTag, err := reference.ParseNormalizedNamed(tag) + if err != nil { + return nil, errors.Wrapf(err, "Invalid tag %#v in manifest.json item %d", tag, i+1) + } + if parsedTag.String() == refString { + return &s.archive.Manifest[i], nil + } + } + } + return nil, errors.Errorf("Tag %#v not found", refString) +} + // Close removes resources associated with an initialized Source, if any. func (s *Source) Close() error { if s.closeArchive { diff --git a/docker/tarfile/src.go b/docker/tarfile/src.go index cbbf462a3f..4c30233f1f 100644 --- a/docker/tarfile/src.go +++ b/docker/tarfile/src.go @@ -28,7 +28,7 @@ func NewSourceFromFileWithContext(sys *types.SystemContext, path string) (*Sourc if err != nil { return nil, err } - src := internal.NewSource(archive, true) + src := internal.NewSource(archive, true, nil) return &Source{internal: src}, nil } @@ -49,7 +49,7 @@ func NewSourceFromStreamWithSystemContext(sys *types.SystemContext, inputStream if err != nil { return nil, err } - src := internal.NewSource(archive, true) + src := internal.NewSource(archive, true, nil) return &Source{internal: src}, nil } diff --git a/docs/containers-transports.5.md b/docs/containers-transports.5.md index 6176324147..ef5094c3b7 100644 --- a/docs/containers-transports.5.md +++ b/docs/containers-transports.5.md @@ -44,7 +44,9 @@ If the first component of name is not recognized as a `hostname[:port]`, `name` ### **docker-archive:**_path[:docker-reference]_ An image is stored in the docker-save(1) formatted file. -_docker-reference_ is only used when creating such a file, and it must not contain a digest. +_docker-reference_ must not contain a digest. +If _docker-reference_ is missing when reading an archive, the archive must contain exactly one image. + It is further possible to copy data to stdin by specifying `docker-archive:/dev/stdin` but note that the used file must be seekable. ### **docker-daemon:**_docker-reference|algo:digest_ From 756bef7602555edc3493df2bd8194f256e7dc90b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 22:51:00 +0200 Subject: [PATCH 31/35] Introduce docker-archive:path:@index syntax for reading untagged images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for path:@index (e.g. path:@0, path:@1 ...) reference syntax to docker-archive. This will allow reading even untagged images from multi-image archives. Signed-off-by: Miloslav Trmač --- docker/archive/dest.go | 3 + docker/archive/src.go | 2 +- docker/archive/transport.go | 67 ++++++++++++++++----- docker/archive/transport_test.go | 99 +++++++++++++++++++++++++------- docker/daemon/daemon_src.go | 2 +- docker/internal/tarfile/src.go | 54 +++++++++++------ docker/tarfile/src.go | 4 +- docs/containers-transports.5.md | 6 +- 8 files changed, 175 insertions(+), 62 deletions(-) diff --git a/docker/archive/dest.go b/docker/archive/dest.go index 272041a30b..662cc9b330 100644 --- a/docker/archive/dest.go +++ b/docker/archive/dest.go @@ -17,6 +17,9 @@ type archiveImageDestination struct { } func newImageDestination(sys *types.SystemContext, ref archiveReference) (types.ImageDestination, error) { + if ref.sourceIndex != -1 { + return nil, errors.Errorf("Destination reference must not contain a manifest index @%d", ref.sourceIndex) + } // ref.path can be either a pipe or a regular file // in the case of a pipe, we require that we can open it for write // in the case of a regular file, we don't want to overwrite any pre-existing file diff --git a/docker/archive/src.go b/docker/archive/src.go index bbb4af92bf..e850b4b4e4 100644 --- a/docker/archive/src.go +++ b/docker/archive/src.go @@ -19,7 +19,7 @@ func newImageSource(ctx context.Context, sys *types.SystemContext, ref archiveRe if err != nil { return nil, err } - src := tarfile.NewSource(archive, true, ref.ref) + src := tarfile.NewSource(archive, true, ref.ref, ref.sourceIndex) return &archiveImageSource{ Source: src, ref: ref, diff --git a/docker/archive/transport.go b/docker/archive/transport.go index 558df9d0f7..de30b22f77 100644 --- a/docker/archive/transport.go +++ b/docker/archive/transport.go @@ -3,6 +3,7 @@ package archive import ( "context" "fmt" + "strconv" "strings" "github.com/containers/image/v5/docker/reference" @@ -44,6 +45,9 @@ type archiveReference struct { path string // May be nil to read the only image in an archive, or to create an untagged image. ref reference.NamedTagged + // If not -1, a zero-based index of the image in the manifest. Valid only for sources. + // Must not be set if ref is set. + sourceIndex int } // ParseReference converts a string, which should not start with the ImageTransport.Name prefix, into an Docker ImageReference. @@ -55,35 +59,64 @@ func ParseReference(refString string) (types.ImageReference, error) { parts := strings.SplitN(refString, ":", 2) path := parts[0] var nt reference.NamedTagged + sourceIndex := -1 if len(parts) == 2 { - // A :tag was specified. - ref, err := reference.ParseNormalizedNamed(parts[1]) - if err != nil { - return nil, errors.Wrapf(err, "docker-archive parsing reference") + // A :tag or :@index was specified. + if len(parts[1]) > 0 && parts[1][0] == '@' { + i, err := strconv.Atoi(parts[1][1:]) + if err != nil { + return nil, errors.Wrapf(err, "Invalid source index %s", parts[1]) + } + if i < 0 { + return nil, errors.Errorf("Invalid source index @%d: must not be negative", i) + } + sourceIndex = i + } else { + ref, err := reference.ParseNormalizedNamed(parts[1]) + if err != nil { + return nil, errors.Wrapf(err, "docker-archive parsing reference") + } + ref = reference.TagNameOnly(ref) + refTagged, isTagged := ref.(reference.NamedTagged) + if !isTagged { // If ref contains a digest, TagNameOnly does not change it + return nil, errors.Errorf("reference does not include a tag: %s", ref.String()) + } + nt = refTagged } - ref = reference.TagNameOnly(ref) - refTagged, isTagged := ref.(reference.NamedTagged) - if !isTagged { // If ref contains a digest, TagNameOnly does not change it - return nil, errors.Errorf("reference does not include a tag: %s", ref.String()) - } - nt = refTagged } - return NewReference(path, nt) + return newReference(path, nt, sourceIndex) } // NewReference returns a Docker archive reference for a path and an optional reference. func NewReference(path string, ref reference.NamedTagged) (types.ImageReference, error) { + return newReference(path, ref, -1) +} + +// NewIndexReference returns a Docker archive reference for a path and a zero-based source manifest index. +func NewIndexReference(path string, sourceIndex int) (types.ImageReference, error) { + return newReference(path, nil, sourceIndex) +} + +// newReference returns a docker archive reference for a path and an optional reference or sourceIndex. +func newReference(path string, ref reference.NamedTagged, sourceIndex int) (types.ImageReference, error) { if strings.Contains(path, ":") { return nil, errors.Errorf("Invalid docker-archive: reference: colon in path %q is not supported", path) } + if ref != nil && sourceIndex != -1 { + return nil, errors.Errorf("Invalid docker-archive: reference: cannot use both a tag and a source index") + } if _, isDigest := ref.(reference.Canonical); isDigest { return nil, errors.Errorf("docker-archive doesn't support digest references: %s", ref.String()) } + if sourceIndex != -1 && sourceIndex < 0 { + return nil, errors.Errorf("Invalid docker-archive: reference: index @%d must not be negative", sourceIndex) + } return archiveReference{ - path: path, - ref: ref, + path: path, + ref: ref, + sourceIndex: sourceIndex, }, nil } @@ -97,10 +130,14 @@ func (ref archiveReference) Transport() types.ImageTransport { // e.g. default attribute values omitted by the user may be filled in in the return value, or vice versa. // WARNING: Do not use the return value in the UI to describe an image, it does not contain the Transport().Name() prefix. func (ref archiveReference) StringWithinTransport() string { - if ref.ref == nil { + switch { + case ref.ref != nil: + return fmt.Sprintf("%s:%s", ref.path, ref.ref.String()) + case ref.sourceIndex != -1: + return fmt.Sprintf("%s:@%d", ref.path, ref.sourceIndex) + default: return ref.path } - return fmt.Sprintf("%s:%s", ref.path, ref.ref.String()) } // DockerReference returns a Docker reference associated with this reference diff --git a/docker/archive/transport_test.go b/docker/archive/transport_test.go index 7b4747d64e..008a026618 100644 --- a/docker/archive/transport_test.go +++ b/docker/archive/transport_test.go @@ -47,16 +47,26 @@ func TestParseReference(t *testing.T) { // testParseReference is a test shared for Transport.ParseReference and ParseReference. func testParseReference(t *testing.T, fn func(string) (types.ImageReference, error)) { - for _, c := range []struct{ input, expectedPath, expectedRef string }{ - {"", "", ""}, // Empty input is explicitly rejected - {"/path", "/path", ""}, - {"/path:busybox:notlatest", "/path", "docker.io/library/busybox:notlatest"}, // Explicit tag - {"/path:busybox" + sha256digest, "", ""}, // Digest references are forbidden - {"/path:busybox", "/path", "docker.io/library/busybox:latest"}, // Default tag + for _, c := range []struct { + input, expectedPath, expectedRef string + expectedSourceIndex int + }{ + {"", "", "", -1}, // Empty input is explicitly rejected + {"/path", "/path", "", -1}, + {"/path:busybox:notlatest", "/path", "docker.io/library/busybox:notlatest", -1}, // Explicit tag + {"/path:busybox" + sha256digest, "", "", -1}, // Digest references are forbidden + {"/path:busybox", "/path", "docker.io/library/busybox:latest", -1}, // Default tag // A github.com/distribution/reference value can have a tag and a digest at the same time! - {"/path:busybox:latest" + sha256digest, "", ""}, // Both tag and digest is rejected - {"/path:docker.io/library/busybox:latest", "/path", "docker.io/library/busybox:latest"}, // All implied values explicitly specified - {"/path:UPPERCASEISINVALID", "", ""}, // Invalid input + {"/path:busybox:latest" + sha256digest, "", "", -1}, // Both tag and digest is rejected + {"/path:docker.io/library/busybox:latest", "/path", "docker.io/library/busybox:latest", -1}, // All implied reference parts explicitly specified + {"/path:UPPERCASEISINVALID", "", "", -1}, // Invalid reference format + {"/path:@", "", "", -1}, // Missing source index + {"/path:@0", "/path", "", 0}, // Valid source index + {"/path:@999999", "/path", "", 999999}, // Valid source index + {"/path:@-2", "", "", -1}, // Negative source index + {"/path:@-1", "", "", -1}, // Negative source index, using the placeholder value + {"/path:busybox@0", "", "", -1}, // References and source indices can’t be combined. + {"/path:@0:busybox", "", "", -1}, // References and source indices can’t be combined. } { ref, err := fn(c.input) if c.expectedPath == "" { @@ -72,10 +82,20 @@ func testParseReference(t *testing.T, fn func(string) (types.ImageReference, err require.NotNil(t, archiveRef.ref, c.input) assert.Equal(t, c.expectedRef, archiveRef.ref.String(), c.input) } + assert.Equal(t, c.expectedSourceIndex, archiveRef.sourceIndex, c.input) } } } +// namedTaggedRef returns a reference.NamedTagged for input +func namedTaggedRef(t *testing.T, input string) reference.NamedTagged { + named, err := reference.ParseNormalizedNamed(input) + require.NoError(t, err, input) + nt, ok := named.(reference.NamedTagged) + require.True(t, ok, input) + return nt +} + func TestNewReference(t *testing.T) { for _, path := range []string{"relative", "/absolute"} { for _, c := range []struct { @@ -88,11 +108,7 @@ func TestNewReference(t *testing.T) { } { var ntRef reference.NamedTagged = nil if c.ref != "" { - namedRef, err := reference.ParseNormalizedNamed(c.ref) - require.NoError(t, err, c.ref) - nt, ok := namedRef.(reference.NamedTagged) - require.True(t, ok, c.ref) - ntRef = nt + ntRef = namedTaggedRef(t, c.ref) } res, err := NewReference(path, ntRef) @@ -109,8 +125,41 @@ func TestNewReference(t *testing.T) { require.NotNil(t, archiveRef.ref, c.ref) assert.Equal(t, ntRef.String(), archiveRef.ref.String(), c.ref) } + assert.Equal(t, -1, archiveRef.sourceIndex, c.ref) } + } + } + _, err := NewReference("with:colon", nil) + assert.Error(t, err) + + // Complete coverage testing of the private newReference here as well + ntRef := namedTaggedRef(t, "busybox:latest") + _, err = newReference("path", ntRef, 0) + assert.Error(t, err) +} +func TestNewIndexReference(t *testing.T) { + for _, path := range []string{"relative", "/absolute"} { + for _, c := range []struct { + index int + ok bool + }{ + {0, true}, + {9999990, true}, + {-1, true}, + {-2, false}, + } { + res, err := NewIndexReference(path, c.index) + if !c.ok { + assert.Error(t, err, c.index) + } else { + require.NoError(t, err, c.index) + archiveRef, ok := res.(archiveReference) + require.True(t, ok, c.index) + assert.Equal(t, path, archiveRef.path) + assert.Nil(t, archiveRef.ref, c.index) + assert.Equal(t, c.index, archiveRef.sourceIndex) + } } } _, err := NewReference("with:colon", nil) @@ -118,11 +167,17 @@ func TestNewReference(t *testing.T) { } // A common list of reference formats to test for the various ImageReference methods. -var validReferenceTestCases = []struct{ input, dockerRef, stringWithinTransport string }{ - {"/pathonly", "", "/pathonly"}, - {"/path:busybox:notlatest", "docker.io/library/busybox:notlatest", "/path:docker.io/library/busybox:notlatest"}, // Explicit tag - {"/path:docker.io/library/busybox:latest", "docker.io/library/busybox:latest", "/path:docker.io/library/busybox:latest"}, // All implied values explicitly specified - {"/path:example.com/ns/foo:bar", "example.com/ns/foo:bar", "/path:example.com/ns/foo:bar"}, // All values explicitly specified +var validReferenceTestCases = []struct { + input, dockerRef string + sourceIndex int + stringWithinTransport string +}{ + {"/pathonly", "", -1, "/pathonly"}, + {"/path:busybox:notlatest", "docker.io/library/busybox:notlatest", -1, "/path:docker.io/library/busybox:notlatest"}, // Explicit tag + {"/path:docker.io/library/busybox:latest", "docker.io/library/busybox:latest", -1, "/path:docker.io/library/busybox:latest"}, // All implied reference part explicitly specified + {"/path:example.com/ns/foo:bar", "example.com/ns/foo:bar", -1, "/path:example.com/ns/foo:bar"}, // All values explicitly specified + {"/path:@0", "", 0, "/path:@0"}, + {"/path:@999999", "", 999999, "/path:@999999"}, } func TestReferenceTransport(t *testing.T) { @@ -176,7 +231,7 @@ func TestReferencePolicyConfigurationNamespaces(t *testing.T) { } func TestReferenceNewImage(t *testing.T) { - for _, suffix := range []string{"", ":emptyimage:latest"} { + for _, suffix := range []string{"", ":emptyimage:latest", ":@0"} { ref, err := ParseReference(tarFixture + suffix) require.NoError(t, err, suffix) img, err := ref.NewImage(context.Background(), nil) @@ -186,7 +241,7 @@ func TestReferenceNewImage(t *testing.T) { } func TestReferenceNewImageSource(t *testing.T) { - for _, suffix := range []string{"", ":emptyimage:latest"} { + for _, suffix := range []string{"", ":emptyimage:latest", ":@0"} { ref, err := ParseReference(tarFixture + suffix) require.NoError(t, err, suffix) src, err := ref.NewImageSource(context.Background(), nil) @@ -218,7 +273,7 @@ func TestReferenceDeleteImage(t *testing.T) { require.NoError(t, err) defer os.RemoveAll(tmpDir) - for i, suffix := range []string{"", ":some-reference"} { + for i, suffix := range []string{"", ":some-reference", ":@0"} { testFile := filepath.Join(tmpDir, fmt.Sprintf("file%d.tar", i)) err := ioutil.WriteFile(testFile, []byte("nonempty"), 0644) require.NoError(t, err, suffix) diff --git a/docker/daemon/daemon_src.go b/docker/daemon/daemon_src.go index 3572d53bb8..74a6788178 100644 --- a/docker/daemon/daemon_src.go +++ b/docker/daemon/daemon_src.go @@ -39,7 +39,7 @@ func newImageSource(ctx context.Context, sys *types.SystemContext, ref daemonRef if err != nil { return nil, err } - src := tarfile.NewSource(archive, true, nil) + src := tarfile.NewSource(archive, true, nil, -1) return &daemonImageSource{ ref: ref, Source: src, diff --git a/docker/internal/tarfile/src.go b/docker/internal/tarfile/src.go index a5ddf6d0ae..7c479cccb3 100644 --- a/docker/internal/tarfile/src.go +++ b/docker/internal/tarfile/src.go @@ -23,8 +23,10 @@ import ( // Source is a partial implementation of types.ImageSource for reading from tarPath. type Source struct { archive *Reader - closeArchive bool // .Close() the archive when the source is closed. - ref reference.NamedTagged // May be nil to indicate the only image in the archive + closeArchive bool // .Close() the archive when the source is closed. + // If ref is nil and sourceIndex is -1, indicates the only image in the archive. + ref reference.NamedTagged // May be nil + sourceIndex int // May be -1 // The following data is only available after ensureCachedDataIsPresent() succeeds tarManifest *ManifestItem // nil if not available yet. configBytes []byte @@ -43,13 +45,14 @@ type layerInfo struct { } // NewSource returns a tarfile.Source for an image in the specified archive matching ref -// (or the only image if ref is nil). +// and sourceIndex (or the only image if they are (nil, -1)). // The archive will be closed if closeArchive -func NewSource(archive *Reader, closeArchive bool, ref reference.NamedTagged) *Source { +func NewSource(archive *Reader, closeArchive bool, ref reference.NamedTagged, sourceIndex int) *Source { return &Source{ archive: archive, closeArchive: closeArchive, ref: ref, + sourceIndex: sourceIndex, } } @@ -99,26 +102,39 @@ func (s *Source) ensureCachedDataIsPresentPrivate() error { // chooseManifest selects a manifest item s.ref. func (s *Source) chooseManifest() (*ManifestItem, error) { - if s.ref == nil { + switch { + case s.ref != nil && s.sourceIndex != -1: + return nil, errors.Errorf("Internal error: Cannot have both ref %s and source index @%d", + s.ref.String(), s.sourceIndex) + + case s.ref != nil: + refString := s.ref.String() + for i := range s.archive.Manifest { + for _, tag := range s.archive.Manifest[i].RepoTags { + parsedTag, err := reference.ParseNormalizedNamed(tag) + if err != nil { + return nil, errors.Wrapf(err, "Invalid tag %#v in manifest.json item @%d", tag, i) + } + if parsedTag.String() == refString { + return &s.archive.Manifest[i], nil + } + } + } + return nil, errors.Errorf("Tag %#v not found", refString) + + case s.sourceIndex != -1: + if s.sourceIndex >= len(s.archive.Manifest) { + return nil, errors.Errorf("Invalid source index @%d, only %d manifest items available", + s.sourceIndex, len(s.archive.Manifest)) + } + return &s.archive.Manifest[s.sourceIndex], nil + + default: if len(s.archive.Manifest) != 1 { return nil, errors.Errorf("Unexpected tar manifest.json: expected 1 item, got %d", len(s.archive.Manifest)) } return &s.archive.Manifest[0], nil } - - refString := s.ref.String() - for i := range s.archive.Manifest { - for _, tag := range s.archive.Manifest[i].RepoTags { - parsedTag, err := reference.ParseNormalizedNamed(tag) - if err != nil { - return nil, errors.Wrapf(err, "Invalid tag %#v in manifest.json item %d", tag, i+1) - } - if parsedTag.String() == refString { - return &s.archive.Manifest[i], nil - } - } - } - return nil, errors.Errorf("Tag %#v not found", refString) } // Close removes resources associated with an initialized Source, if any. diff --git a/docker/tarfile/src.go b/docker/tarfile/src.go index 4c30233f1f..e14e4cabb4 100644 --- a/docker/tarfile/src.go +++ b/docker/tarfile/src.go @@ -28,7 +28,7 @@ func NewSourceFromFileWithContext(sys *types.SystemContext, path string) (*Sourc if err != nil { return nil, err } - src := internal.NewSource(archive, true, nil) + src := internal.NewSource(archive, true, nil, -1) return &Source{internal: src}, nil } @@ -49,7 +49,7 @@ func NewSourceFromStreamWithSystemContext(sys *types.SystemContext, inputStream if err != nil { return nil, err } - src := internal.NewSource(archive, true, nil) + src := internal.NewSource(archive, true, nil, -1) return &Source{internal: src}, nil } diff --git a/docs/containers-transports.5.md b/docs/containers-transports.5.md index ef5094c3b7..c87283fb84 100644 --- a/docs/containers-transports.5.md +++ b/docs/containers-transports.5.md @@ -41,11 +41,13 @@ If `name` does not contain a slash, it is treated as `docker.io/library/name`. Otherwise, the component before the first slash is checked if it is recognized as a `hostname[:port]` (i.e., it contains either a . or a :, or the component is exactly localhost). If the first component of name is not recognized as a `hostname[:port]`, `name` is treated as `docker.io/name`. -### **docker-archive:**_path[:docker-reference]_ +### **docker-archive:**_path[:{docker-reference|@source-index}]_ An image is stored in the docker-save(1) formatted file. _docker-reference_ must not contain a digest. -If _docker-reference_ is missing when reading an archive, the archive must contain exactly one image. +Alternatively, for reading archives, @_source-index_ is a zero-based index in archive manifest +(to access untagged images). +If neither _docker-reference_ nor @_source_index is specified when reading an archive, the archive must contain exactly one image. It is further possible to copy data to stdin by specifying `docker-archive:/dev/stdin` but note that the used file must be seekable. From 4176316bcdc6a910f2a5015c0244cac6e8d08960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 21:56:03 +0200 Subject: [PATCH 32/35] Introduce docker/archive.Reader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit with only two operations: Close(), and List() which returns a set of ImageReference objects that allow accessing the individual images. For now, use of every reference triggers creation of a new tarfile.Reader; that will be fixed momentarily. Signed-off-by: Miloslav Trmač --- docker/archive/reader.go | 68 +++++++++++++++++++++++++++++++ docker/internal/tarfile/reader.go | 16 +++++++- 2 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 docker/archive/reader.go diff --git a/docker/archive/reader.go b/docker/archive/reader.go new file mode 100644 index 0000000000..82ebd9ffa1 --- /dev/null +++ b/docker/archive/reader.go @@ -0,0 +1,68 @@ +package archive + +import ( + "github.com/containers/image/v5/docker/internal/tarfile" + "github.com/containers/image/v5/docker/reference" + "github.com/containers/image/v5/types" + "github.com/pkg/errors" +) + +// Reader manages a single Docker archive, allows listing its contents and accessing +// individual images with less overhead than creating image references individually +// (because the archive is, if necessary, copied or decompressed only once). +type Reader struct { + path string // The original, user-specified path; not the maintained temporary file, if any + archive *tarfile.Reader +} + +// NewReader returns a Reader for path. +// The caller should call .Close() on the returned object. +func NewReader(sys *types.SystemContext, path string) (*Reader, error) { + archive, err := tarfile.NewReaderFromFile(sys, path) + if err != nil { + return nil, err + } + return &Reader{ + path: path, + archive: archive, + }, nil +} + +// Close deletes temporary files associated with the Reader, if any. +func (r *Reader) Close() error { + return r.archive.Close() +} + +// List returns the a set of references for images in the Reader, +// grouped by the image the references point to. +// The references are valid only until the Reader is closed. +func (r *Reader) List() ([][]types.ImageReference, error) { + res := [][]types.ImageReference{} + for imageIndex, image := range r.archive.Manifest { + refs := []types.ImageReference{} + for _, tag := range image.RepoTags { + parsedTag, err := reference.ParseNormalizedNamed(tag) + if err != nil { + return nil, errors.Wrapf(err, "Invalid tag %#v in manifest item @%d", tag, imageIndex) + } + nt, ok := parsedTag.(reference.NamedTagged) + if !ok { + return nil, errors.Errorf("Invalid tag %s (%s): does not contain a tag", tag, parsedTag.String()) + } + ref, err := NewReference(r.path, nt) + if err != nil { + return nil, errors.Wrapf(err, "Error creating a reference for tag %#v in manifest item @%d", tag, imageIndex) + } + refs = append(refs, ref) + } + if len(refs) == 0 { + ref, err := NewIndexReference(r.path, imageIndex) + if err != nil { + return nil, errors.Wrapf(err, "Error creating a reference for manifest item @%d", imageIndex) + } + refs = append(refs, ref) + } + res = append(res, refs) + } + return res, nil +} diff --git a/docker/internal/tarfile/reader.go b/docker/internal/tarfile/reader.go index 52f13298ff..d3480eb91f 100644 --- a/docker/internal/tarfile/reader.go +++ b/docker/internal/tarfile/reader.go @@ -17,7 +17,9 @@ import ( // Reader is a ((docker save)-formatted) tar archive that allows random access to any component. type Reader struct { - path string + // None of the fields below are modified after the archive is created, until .Close(); + // this allows concurrent readers of the same archive. + path string // "" if the archive has already been closed. removeOnClose bool // Remove file on close if true Manifest []ManifestItem // Guaranteed to exist after the archive is created. } @@ -119,8 +121,10 @@ func newReader(path string, removeOnClose bool) (*Reader, error) { // Close removes resources associated with an initialized Reader, if any. func (r *Reader) Close() error { + path := r.path + r.path = "" // Mark the archive as closed if r.removeOnClose { - return os.Remove(r.path) + return os.Remove(path) } return nil } @@ -138,8 +142,15 @@ func (t *tarReadCloser) Close() error { // openTarComponent returns a ReadCloser for the specific file within the archive. // This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers), // and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough. +// It is safe to call this method from multiple goroutines simultaneously. // The caller should call .Close() on the returned stream. func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) { + // This is only a sanity check; if anyone did concurrently close ra, this access is technically + // racy against the write in .Close(). + if r.path == "" { + return nil, errors.New("Internal error: trying to read an already closed tarfile.Reader") + } + f, err := os.Open(r.path) if err != nil { return nil, err @@ -202,6 +213,7 @@ func findTarComponent(inputFile io.Reader, componentPath string) (*tar.Reader, * } // readTarComponent returns full contents of componentPath. +// It is safe to call this method from multiple goroutines simultaneously. func (r *Reader) readTarComponent(path string, limit int) ([]byte, error) { file, err := r.openTarComponent(path) if err != nil { From be68ec7a523b08d5b2ff1818ea6c81adfe5d00c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 25 Jul 2020 22:08:00 +0200 Subject: [PATCH 33/35] Finally, share a tarfile.Reader across archiveSource objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In archive.Reader, embed a reference to tarfile.Reader to the created ImageReference objects, and use them in NewImageSource. Signed-off-by: Miloslav Trmač --- docker/archive/reader.go | 4 ++-- docker/archive/src.go | 17 +++++++++++++---- docker/archive/transport.go | 23 +++++++++++++++-------- docker/archive/transport_test.go | 2 +- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/docker/archive/reader.go b/docker/archive/reader.go index 82ebd9ffa1..7c58487e38 100644 --- a/docker/archive/reader.go +++ b/docker/archive/reader.go @@ -49,14 +49,14 @@ func (r *Reader) List() ([][]types.ImageReference, error) { if !ok { return nil, errors.Errorf("Invalid tag %s (%s): does not contain a tag", tag, parsedTag.String()) } - ref, err := NewReference(r.path, nt) + ref, err := newReference(r.path, nt, -1, r.archive) if err != nil { return nil, errors.Wrapf(err, "Error creating a reference for tag %#v in manifest item @%d", tag, imageIndex) } refs = append(refs, ref) } if len(refs) == 0 { - ref, err := NewIndexReference(r.path, imageIndex) + ref, err := newReference(r.path, nil, imageIndex, r.archive) if err != nil { return nil, errors.Wrapf(err, "Error creating a reference for manifest item @%d", imageIndex) } diff --git a/docker/archive/src.go b/docker/archive/src.go index e850b4b4e4..7acca210ef 100644 --- a/docker/archive/src.go +++ b/docker/archive/src.go @@ -15,11 +15,20 @@ type archiveImageSource struct { // newImageSource returns a types.ImageSource for the specified image reference. // The caller must call .Close() on the returned ImageSource. func newImageSource(ctx context.Context, sys *types.SystemContext, ref archiveReference) (types.ImageSource, error) { - archive, err := tarfile.NewReaderFromFile(sys, ref.path) - if err != nil { - return nil, err + var archive *tarfile.Reader + var closeArchive bool + if ref.archiveReader != nil { + archive = ref.archiveReader + closeArchive = false + } else { + a, err := tarfile.NewReaderFromFile(sys, ref.path) + if err != nil { + return nil, err + } + archive = a + closeArchive = true } - src := tarfile.NewSource(archive, true, ref.ref, ref.sourceIndex) + src := tarfile.NewSource(archive, closeArchive, ref.ref, ref.sourceIndex) return &archiveImageSource{ Source: src, ref: ref, diff --git a/docker/archive/transport.go b/docker/archive/transport.go index de30b22f77..384a84df19 100644 --- a/docker/archive/transport.go +++ b/docker/archive/transport.go @@ -6,6 +6,7 @@ import ( "strconv" "strings" + "github.com/containers/image/v5/docker/internal/tarfile" "github.com/containers/image/v5/docker/reference" ctrImage "github.com/containers/image/v5/image" "github.com/containers/image/v5/transports" @@ -48,6 +49,9 @@ type archiveReference struct { // If not -1, a zero-based index of the image in the manifest. Valid only for sources. // Must not be set if ref is set. sourceIndex int + // If not nil, must have been created from path (but archiveReader.path may point at a temporary + // file, not necesarily path precisely). + archiveReader *tarfile.Reader } // ParseReference converts a string, which should not start with the ImageTransport.Name prefix, into an Docker ImageReference. @@ -86,21 +90,23 @@ func ParseReference(refString string) (types.ImageReference, error) { } } - return newReference(path, nt, sourceIndex) + return newReference(path, nt, sourceIndex, nil) } // NewReference returns a Docker archive reference for a path and an optional reference. func NewReference(path string, ref reference.NamedTagged) (types.ImageReference, error) { - return newReference(path, ref, -1) + return newReference(path, ref, -1, nil) } // NewIndexReference returns a Docker archive reference for a path and a zero-based source manifest index. func NewIndexReference(path string, sourceIndex int) (types.ImageReference, error) { - return newReference(path, nil, sourceIndex) + return newReference(path, nil, sourceIndex, nil) } -// newReference returns a docker archive reference for a path and an optional reference or sourceIndex. -func newReference(path string, ref reference.NamedTagged, sourceIndex int) (types.ImageReference, error) { +// newReference returns a docker archive reference for a path, an optional reference or sourceIndex, +// and optionally a tarfile.Reader matching path. +func newReference(path string, ref reference.NamedTagged, sourceIndex int, + archiveReader *tarfile.Reader) (types.ImageReference, error) { if strings.Contains(path, ":") { return nil, errors.Errorf("Invalid docker-archive: reference: colon in path %q is not supported", path) } @@ -114,9 +120,10 @@ func newReference(path string, ref reference.NamedTagged, sourceIndex int) (type return nil, errors.Errorf("Invalid docker-archive: reference: index @%d must not be negative", sourceIndex) } return archiveReference{ - path: path, - ref: ref, - sourceIndex: sourceIndex, + path: path, + ref: ref, + sourceIndex: sourceIndex, + archiveReader: archiveReader, }, nil } diff --git a/docker/archive/transport_test.go b/docker/archive/transport_test.go index 008a026618..e0328f9c98 100644 --- a/docker/archive/transport_test.go +++ b/docker/archive/transport_test.go @@ -134,7 +134,7 @@ func TestNewReference(t *testing.T) { // Complete coverage testing of the private newReference here as well ntRef := namedTaggedRef(t, "busybox:latest") - _, err = newReference("path", ntRef, 0) + _, err = newReference("path", ntRef, 0, nil) assert.Error(t, err) } From 820918a114ad4ea1eb9e655c442021667b09b365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 29 Jul 2020 22:22:38 +0200 Subject: [PATCH 34/35] Remove an unused constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- docker/tarfile/types.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docker/tarfile/types.go b/docker/tarfile/types.go index 358d304e5a..0f14389e6d 100644 --- a/docker/tarfile/types.go +++ b/docker/tarfile/types.go @@ -4,12 +4,5 @@ import ( internal "github.com/containers/image/v5/docker/internal/tarfile" ) -// Various data structures. - -// Based on github.com/docker/docker/image/tarexport/tarexport.go -const ( - manifestFileName = "manifest.json" -) - // ManifestItem is an element of the array stored in the top-level manifest.json file. type ManifestItem = internal.ManifestItem // All public members from the internal package remain accessible. From d7c80f4c0d5b2da2966bd494fe4b81feb37e40a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 29 Jul 2020 22:27:06 +0200 Subject: [PATCH 35/35] Move docker/tarfile/src_test.go to docker/internal/tarfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit , closer to the implementation being tested. Signed-off-by: Miloslav Trmač --- docker/{ => internal}/tarfile/src_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) rename docker/{ => internal}/tarfile/src_test.go (87%) diff --git a/docker/tarfile/src_test.go b/docker/internal/tarfile/src_test.go similarity index 87% rename from docker/tarfile/src_test.go rename to docker/internal/tarfile/src_test.go index 8b22aa5aff..f578f50c91 100644 --- a/docker/tarfile/src_test.go +++ b/docker/internal/tarfile/src_test.go @@ -26,7 +26,8 @@ func TestSourcePrepareLayerData(t *testing.T) { var tarfileBuffer bytes.Buffer ctx := context.Background() - dest := NewDestinationWithContext(nil, &tarfileBuffer, nil) + writer := NewWriter(&tarfileBuffer) + dest := NewDestination(nil, writer, nil) // No layers configInfo, err := dest.PutBlob(ctx, bytes.NewBufferString(c.config), types.BlobInfo{Size: -1}, cache, true) @@ -40,10 +41,12 @@ func TestSourcePrepareLayerData(t *testing.T) { require.NoError(t, err, c.config) err = dest.PutManifest(ctx, manifest, nil) require.NoError(t, err, c.config) - err = dest.Commit(ctx) + err = writer.Close() require.NoError(t, err, c.config) - src, err := NewSourceFromStreamWithSystemContext(nil, &tarfileBuffer) + reader, err := NewReaderFromStream(nil, &tarfileBuffer) + require.NoError(t, err, c.config) + src := NewSource(reader, true, nil, -1) require.NoError(t, err, c.config) defer src.Close() configStream, _, err := src.GetBlob(ctx, types.BlobInfo{