Skip to content

Commit

Permalink
Cache loaded images for performance improvements. (bazelbuild#1934)
Browse files Browse the repository at this point in the history
Locally on ubuntu 18.04, the join_layers step takes
greater than 30 seconds for the container_bundle_with_install_pkgs
target without this change, and ~5 seconds with this change.

With the previous implementation, join_layers was passing the same
set of images to reader.parts for each call to ReadImage. The reader,
created fresh for each call to ReadImage, would then load these same
images again.

This results in a fixed cost for every image you add to the bundle. For
large bundles, this can be very costly (on the order of minutes).

Updated other callers of compat.ReadImages to use this new API of
creating a reader first, then calling reader.ReadImage.

Co-authored-by: aptenodytes-forsteri <[email protected]>
  • Loading branch information
1 parent 3929701 commit debc18a
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 38 deletions.
3 changes: 2 additions & 1 deletion container/go/cmd/digester/digester.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ func main() {
if err != nil {
log.Fatalf("Unable to determine parts of the image from the specified arguments: %v", err)
}
img, err := compat.ReadImage(imgParts)
r := compat.Reader{Parts: imgParts}
img, err := r.ReadImage()
if err != nil {
log.Fatalf("Error reading image: %v", err)
}
Expand Down
3 changes: 2 additions & 1 deletion container/go/cmd/flattener/flattener.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ func main() {
if err != nil {
log.Fatalf("Unable to determine parts of the image from the specified arguments: %v", err)
}
img, err := compat.ReadImage(imgParts)
r := compat.Reader{Parts: imgParts}
img, err := r.ReadImage()
if err != nil {
log.Fatalf("Error reading image: %v", err)
}
Expand Down
16 changes: 9 additions & 7 deletions container/go/cmd/join_layers/join_layers.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,19 @@ func writeOutput(outputTarball string, tarballFormat string, tagToConfigs, tagTo
if err != nil {
return errors.Wrap(err, "unable to load images from the given tarballs")
}
parts := compat.ImageParts{
Images: images,
Layers: layerParts,
}
r := compat.Reader{Parts: parts}
for tag, configFile := range tagToConfigs {
// Manifest file may not have been specified and this is ok as it's
// only required if the base images has foreign layers.
manifestFile := tagToBaseManifests[tag]
parts := compat.ImageParts{
Config: configFile,
BaseManifest: manifestFile,
Images: images,
Layers: layerParts,
}
img, err := compat.ReadImage(parts)
r.Parts.Config = configFile
r.Parts.BaseManifest = manifestFile

img, err := r.ReadImage()
if err != nil {
return errors.Wrapf(err, "unable to load image %v corresponding to config %s", tag, configFile)
}
Expand Down
3 changes: 2 additions & 1 deletion container/go/cmd/pusher/pusher.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ func main() {
if err != nil {
log.Fatalf("Unable to determine parts of the image from the specified arguments: %v", err)
}
img, err := compat.ReadImage(imgParts)
r := compat.Reader{Parts: imgParts}
img, err := r.ReadImage()
if err != nil {
log.Fatalf("Error reading image: %v", err)
}
Expand Down
68 changes: 40 additions & 28 deletions container/go/pkg/compat/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ func ImagePartsFromArgs(config, baseManifest, imgTarball string, layers []string
return result, nil
}

// reader maintains the state necessary to build a legacyImage object from an
// Reader maintains the state necessary to build a legacyImage object from an
// ImageParts object.
type reader struct {
type Reader struct {
// parts is the ImageParts being loaded.
parts ImageParts
Parts ImageParts
// baseManifest is the manifest of the very first base image in the chain
// of images being loaded.
baseManifest *v1.Manifest
Expand All @@ -130,32 +130,35 @@ type reader struct {
// layerLookup is a map from the diffID of a layer to the layer
// itself.
layerLookup map[v1.Hash]v1.Layer
// loadedImageCache is a cache of all images that have been loaded into memory,
// to prevent costly reloads.
loadedImageCache map[v1.Hash]bool
}

// loadMetadata loads the image metadata for the image parts in the given
// reader.
func (r *reader) loadMetadata() error {
cf, err := os.Open(r.parts.Config)
func (r *Reader) loadMetadata() error {
cf, err := os.Open(r.Parts.Config)
if err != nil {
return errors.Wrapf(err, "unable to open image config file %s", r.parts.Config)
return errors.Wrapf(err, "unable to open image config file %s", r.Parts.Config)
}
c, err := v1.ParseConfigFile(cf)
if err != nil {
return errors.Wrapf(err, "unable to parse image config from %s", r.parts.Config)
return errors.Wrapf(err, "unable to parse image config from %s", r.Parts.Config)
}
r.config = c
if r.parts.BaseManifest == "" {
if r.Parts.BaseManifest == "" {
// Base manifest is optional. It's only needed for images whose base
// manifests have foreign layers.
return nil
}
mf, err := os.Open(r.parts.BaseManifest)
mf, err := os.Open(r.Parts.BaseManifest)
if err != nil {
return errors.Wrapf(err, "unable to open base image manifest file %s", r.parts.BaseManifest)
return errors.Wrapf(err, "unable to open base image manifest file %s", r.Parts.BaseManifest)
}
m, err := v1.ParseManifest(mf)
if err != nil {
return errors.Wrapf(err, "unable to parse base image manifest from %s", r.parts.BaseManifest)
return errors.Wrapf(err, "unable to parse base image manifest from %s", r.Parts.BaseManifest)
}
r.baseManifest = m
return nil
Expand Down Expand Up @@ -209,7 +212,7 @@ func (l *foreignLayer) MediaType() (types.MediaType, error) {

// loadForeignLayers loads the foreign layers from the base manifest in the
// given reader into the layer lookup.
func (r *reader) loadForeignLayers() error {
func (r *Reader) loadForeignLayers() error {
if r.baseManifest == nil {
// No base manifest so no foreign layers to load.
return nil
Expand Down Expand Up @@ -237,8 +240,12 @@ func (r *reader) loadForeignLayers() error {

// loadImages loads the layers from the given images into the layers lookup
// in the given reader.
func (r *reader) loadImages(images []v1.Image) error {
func (r *Reader) loadImages(images []v1.Image) error {
for _, img := range images {
digest, _ := img.Digest()
if r.loadedImageCache[digest] {
continue
}
layers, err := img.Layers()
if err != nil {
return errors.Wrap(err, "unable to get the layers in image")
Expand All @@ -250,31 +257,32 @@ func (r *reader) loadImages(images []v1.Image) error {
}
r.layerLookup[diffID] = l
}
r.loadedImageCache[digest] = true
}
return nil
}

// loadImgTarball loads the layers from the image tarball in the parts section
// of the given reader if one was specified into the layers lookup in the given
// reader.
func (r *reader) loadImgTarball() error {
if r.parts.ImageTarball == "" {
func (r *Reader) loadImgTarball() error {
if r.Parts.ImageTarball == "" {
return nil
}
img, err := tarball.ImageFromPath(r.parts.ImageTarball, nil)
img, err := tarball.ImageFromPath(r.Parts.ImageTarball, nil)
if err != nil {
return errors.Wrapf(err, "unable to load image from tarball %s", r.parts.ImageTarball)
return errors.Wrapf(err, "unable to load image from tarball %s", r.Parts.ImageTarball)
}
if err := r.loadImages([]v1.Image{img}); err != nil {
return errors.Wrapf(err, "unable to load the layers from image loaded from tarball %s", r.parts.ImageTarball)
return errors.Wrapf(err, "unable to load the layers from image loaded from tarball %s", r.Parts.ImageTarball)
}
return nil
}

// loadLayers loads layers specified as parts in the ImageParts section in the
// given reader.
func (r *reader) loadLayers() error {
for _, l := range r.parts.Layers {
func (r *Reader) loadLayers() error {
for _, l := range r.Parts.Layers {
layer, err := l.V1Layer()
if err != nil {
return errors.Wrap(err, "unable to build a v1.Layer from the specified parts")
Expand All @@ -289,23 +297,27 @@ func (r *reader) loadLayers() error {
}

// ReadImage loads a v1.Image from the given ImageParts
func ReadImage(parts ImageParts) (v1.Image, error) {
func (r *Reader) ReadImage() (v1.Image, error) {
// Special case: if we only have a tarball, we can instantiate the image
// directly from that. Otherwise, we'll process the image layers
// individually as specified in the config.
if parts.ImageTarball != "" && parts.Config == "" {
return tarball.ImageFromPath(parts.ImageTarball, nil)
if r.Parts.ImageTarball != "" && r.Parts.Config == "" {
return tarball.ImageFromPath(r.Parts.ImageTarball, nil)
}

r := reader{parts: parts}
r.layerLookup = make(map[v1.Hash]v1.Layer)
if r.layerLookup == nil {
r.layerLookup = make(map[v1.Hash]v1.Layer)
}
if r.loadedImageCache == nil {
r.loadedImageCache = make(map[v1.Hash]bool)
}
if err := r.loadMetadata(); err != nil {
return nil, errors.Wrap(err, "unable to load image metadata")
}
if err := r.loadForeignLayers(); err != nil {
return nil, errors.Wrap(err, "unable to load foreign layers specified in the base manifest")
}
if err := r.loadImages(r.parts.Images); err != nil {
if err := r.loadImages(r.Parts.Images); err != nil {
return nil, errors.Wrap(err, "unable to load layers from the images in the given image parts")
}
if err := r.loadImgTarball(); err != nil {
Expand All @@ -318,12 +330,12 @@ func ReadImage(parts ImageParts) (v1.Image, error) {
for _, diffID := range r.config.RootFS.DiffIDs {
layer, ok := r.layerLookup[diffID]
if !ok {
return nil, errors.Errorf("unable to locate layer with diffID %v as indicated in image config %s", diffID, parts.Config)
return nil, errors.Errorf("unable to locate layer with diffID %v as indicated in image config %s", diffID, r.Parts.Config)
}
layers = append(layers, layer)
}
img := &legacyImage{
configPath: parts.Config,
configPath: r.Parts.Config,
layers: layers,
}
if err := img.init(); err != nil {
Expand Down
29 changes: 29 additions & 0 deletions tests/container/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ load(
"@bazel_tools//tools/build_rules:test_rules.bzl",
"file_test",
)
load("@rules_python//python:defs.bzl", "py_test")
load("//container:bundle.bzl", "container_bundle")
load(
"//container:container.bzl",
Expand Down Expand Up @@ -969,3 +970,31 @@ py_test(
srcs_version = "PY3",
deps = ["@containerregistry"],
)

container_image(
name = "test_install_pkgs",
base = "//tests/docker/package_managers:test_install_pkgs.tar",
)

container_image(
name = "test_install_git_for_reproducibility_1",
base = "//tests/docker/package_managers:install_git_for_reproducibility_1.tar",
)

# Test a container bundle with several images and install_pkgs.
# This should not tax join_layers unnecessarily.
# Run bazel build @io_bazel_rules_docker//tests/container:container_bundle_with_install_pkgs.tar to test.
# Prior to adding a caching step to compat/reader.go, join_layers.go
# would take substantially longer for this target.
container_bundle(
name = "container_bundle_with_install_pkgs",
images = {
"install_pkgs_1:latest": ":test_install_pkgs",
"install_pkgs_2:latest": ":test_install_git_for_reproducibility_1",
"localhost:5000/image0:latest": "//testdata:base_with_entrypoint",
"localhost:5000/image1:latest": "//testdata:link_with_files_base",
"localhost:5000/image2:latest": "//testdata:with_double_env",
"localhost:5000/image3:latest": "//testdata:with_label",
"localhost:5000/image4:latest": "//testdata:with_double_label",
},
)

0 comments on commit debc18a

Please sign in to comment.