Skip to content

Commit

Permalink
Re-Add file check to cacheing algorithm
Browse files Browse the repository at this point in the history
Add back the file checking mechanisms that were removed as part of the
reworking of the cache in #1792.  If we have a 'COPY mytar.tar.xz' command
in the Containerfile and the tar file was regenerated after an intial build,
secondary 'buildah bud' commands would use the cache from the first build
for the tar file instead of regenerating it.

The historyMatches() function (https://github.com/containers/buildah/blob/master/imagebuildah/executor.go#L287)
does not seem to pick up this type of change and I wasn't able to quickly
rework that to do so.  We may want to revert back to this check for now,
then tweak the historyMatches to work properly in a follow up PR.

Regardless, I've added a test as part of this PR to catch this issue
going forward.

Fixes:  #1906

Signed-off-by: TomSweeneyRedHat <[email protected]>
  • Loading branch information
TomSweeneyRedHat committed Oct 16, 2019
1 parent 0f7148b commit 2d7aa9d
Show file tree
Hide file tree
Showing 4 changed files with 240 additions and 1 deletion.
115 changes: 115 additions & 0 deletions imagebuildah/chroot_symlink_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"os"
"path/filepath"
"strings"
"time"

"github.com/containers/storage/pkg/reexec"
"github.com/pkg/errors"
Expand All @@ -14,11 +15,13 @@ import (

const (
symlinkChrootedCommand = "chrootsymlinks-resolve"
symlinkModifiedTime = "modtimesymlinks-resolve"
maxSymlinksResolved = 40
)

func init() {
reexec.Register(symlinkChrootedCommand, resolveChrootedSymlinks)
reexec.Register(symlinkModifiedTime, resolveSymlinkTimeModified)
}

// resolveSymlink uses a child subprocess to resolve any symlinks in filename
Expand Down Expand Up @@ -68,6 +71,118 @@ func resolveChrootedSymlinks() {
os.Exit(status)
}

// main() for grandparent subprocess. Its main job is to shuttle stdio back
// and forth, managing a pseudo-terminal if we want one, for our child, the
// parent subprocess.
func resolveSymlinkTimeModified() {
status := 0
flag.Parse()
if len(flag.Args()) < 1 {
os.Exit(1)
}
// Our first parameter is the directory to chroot into.
if err := unix.Chdir(flag.Arg(0)); err != nil {
fmt.Fprintf(os.Stderr, "chdir(): %v\n", err)
os.Exit(1)
}
if err := unix.Chroot(flag.Arg(0)); err != nil {
fmt.Fprintf(os.Stderr, "chroot(): %v\n", err)
os.Exit(1)
}

// Our second parameter is the path name to evaluate for symbolic links.
// Our third parameter is the time the cached intermediate image was created.
// We check whether the modified time of the filepath we provide is after the time the cached image was created.
timeIsGreater, err := modTimeIsGreater(flag.Arg(0), flag.Arg(1), flag.Arg(2))
if err != nil {
fmt.Fprintf(os.Stderr, "error checking if modified time of resolved symbolic link is greater: %v\n", err)
os.Exit(1)
}
if _, err := os.Stdout.WriteString(fmt.Sprintf("%v", timeIsGreater)); err != nil {
fmt.Fprintf(os.Stderr, "error writing string to stdout: %v\n", err)
os.Exit(1)
}
os.Exit(status)
}

// resolveModifiedTime (in the grandparent process) checks filename for any symlinks,
// resolves it and compares the modified time of the file with historyTime, which is
// the creation time of the cached image. It returns true if filename was modified after
// historyTime, otherwise returns false.
func resolveModifiedTime(rootdir, filename, historyTime string) (bool, error) {
// The child process expects a chroot and one path that
// will be consulted relative to the chroot directory and evaluated
// for any symbolic links present.
cmd := reexec.Command(symlinkModifiedTime, rootdir, filename, historyTime)
output, err := cmd.CombinedOutput()
if err != nil {
return false, errors.Wrapf(err, string(output))
}
// Hand back true/false depending on in the file was modified after the caches image was created.
return string(output) == "true", nil
}

// modTimeIsGreater goes through the files added/copied in using the Dockerfile and
// checks the time stamp (follows symlinks) with the time stamp of when the cached
// image was created. IT compares the two and returns true if the file was modified
// after the cached image was created, otherwise it returns false.
func modTimeIsGreater(rootdir, path string, historyTime string) (bool, error) {
var timeIsGreater bool

// Convert historyTime from string to time.Time for comparison
histTime, err := time.Parse(time.RFC3339Nano, historyTime)
if err != nil {
return false, errors.Wrapf(err, "error converting string to time.Time %q", historyTime)
}

// Since we are chroot in rootdir, we want a relative path, i.e (path - rootdir)
relPath, err := filepath.Rel(rootdir, path)
if err != nil {
return false, errors.Wrapf(err, "error making path %q relative to %q", path, rootdir)
}

// Walk the file tree and check the time stamps.
err = filepath.Walk(relPath, func(path string, info os.FileInfo, err error) error {
// If using cached images, it is possible for files that are being copied to come from
// previous build stages. But if using cached images, then the copied file won't exist
// since a container won't have been created for the previous build stage and info will be nil.
// In that case just return nil and continue on with using the cached image for the whole build process.
if info == nil {
return nil
}
modTime := info.ModTime()
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
// Evaluate any symlink that occurs to get updated modified information
resolvedPath, err := filepath.EvalSymlinks(path)
if err != nil && os.IsNotExist(err) {
return errors.Wrapf(errDanglingSymlink, "%q", path)
}
if err != nil {
return errors.Wrapf(err, "error evaluating symlink %q", path)
}
fileInfo, err := os.Stat(resolvedPath)
if err != nil {
return errors.Wrapf(err, "error getting file info %q", resolvedPath)
}
modTime = fileInfo.ModTime()
}
if modTime.After(histTime) {
timeIsGreater = true
return nil
}
return nil
})

if err != nil {
// if error is due to dangling symlink, ignore error and return nil
if errors.Cause(err) == errDanglingSymlink {
return false, nil
}
return false, errors.Wrapf(err, "error walking file tree %q", path)
}
return timeIsGreater, err
}

// getSymbolic link goes through each part of the path and continues resolving symlinks as they appear.
// Returns what the whole target path for what "path" resolves to.
func getSymbolicLink(path string) (string, error) {
Expand Down
7 changes: 7 additions & 0 deletions imagebuildah/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package imagebuildah

import "errors"

var (
errDanglingSymlink = errors.New("error evaluating dangling symlink")
)
93 changes: 92 additions & 1 deletion imagebuildah/stage_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -1085,13 +1086,83 @@ func (s *StageExecutor) intermediateImageExists(ctx context.Context, currNode *p
}
// children + currNode is the point of the Dockerfile we are currently at.
if s.executor.historyMatches(baseHistory, currNode, history, addedContentDigest) {
return image.ID, nil
// a COPY or ADD command.
filesMatch, err := s.copiedFilesMatch(currNode, history[len(history)-1].Created)
if err != nil {
return "", errors.Wrapf(err, "error checking if copied files match")
}
if filesMatch {
return image.ID, nil
}
}
}
}
return "", nil
}

// getFilesToCopy goes through node to get all the src files that are copied, added or downloaded.
// It is possible for the Dockerfile to have src as hom*, which means all files that have hom as a prefix.
// Another format is hom?.txt, which means all files that have that name format with the ? replaced by another character.
func (s *StageExecutor) getFilesToCopy(node *parser.Node) ([]string, error) {
currNode := node.Next
var src []string
for currNode.Next != nil {
if strings.HasPrefix(currNode.Value, "http://") || strings.HasPrefix(currNode.Value, "https://") {
src = append(src, currNode.Value)
currNode = currNode.Next
continue
}
matches, err := filepath.Glob(filepath.Join(s.copyFrom, currNode.Value))
if err != nil {
return nil, errors.Wrapf(err, "error finding match for pattern %q", currNode.Value)
}
src = append(src, matches...)
currNode = currNode.Next
}
return src, nil
}

// copiedFilesMatch checks to see if the node instruction is a COPY or ADD.
// If it is either of those two it checks the timestamps on all the files copied/added
// by the dockerfile. If the host version has a time stamp greater than the time stamp
// of the build, the build will not use the cached version and will rebuild.
func (s *StageExecutor) copiedFilesMatch(node *parser.Node, historyTime *time.Time) (bool, error) {
if node.Value != "add" && node.Value != "copy" {
return true, nil
}

src, err := s.getFilesToCopy(node)
if err != nil {
return false, err
}
for _, item := range src {
// for urls, check the Last-Modified field in the header.
if strings.HasPrefix(item, "http://") || strings.HasPrefix(item, "https://") {
urlContentNew, err := urlContentModified(item, historyTime)
if err != nil {
return false, err
}
if urlContentNew {
return false, nil
}
continue
}
// Walks the file tree for local files and uses chroot to ensure we don't escape out of the allowed path
// when resolving any symlinks.
// Change the time format to ensure we don't run into a parsing error when converting again from string
// to time.Time. It is a known Go issue that the conversions cause errors sometimes, so specifying a particular
// time format here when converting to a string.
timeIsGreater, err := resolveModifiedTime(s.copyFrom, item, historyTime.Format(time.RFC3339Nano))
if err != nil {
return false, errors.Wrapf(err, "error resolving symlinks and comparing modified times: %q", item)
}
if timeIsGreater {
return false, nil
}
}
return true, nil
}

// commit writes the container's contents to an image, using a passed-in tag as
// the name if there is one, generating a unique ID-based one otherwise.
func (s *StageExecutor) commit(ctx context.Context, ib *imagebuilder.Builder, createdBy string, emptyLayer bool, output string) (string, reference.Canonical, error) {
Expand Down Expand Up @@ -1215,3 +1286,23 @@ func (s *StageExecutor) EnsureContainerPath(path string) error {
}
return nil
}

// urlContentModified sends a get request to the url and checks if the header has a value in
// Last-Modified, and if it does compares the time stamp to that of the history of the cached image.
// returns true if there is no Last-Modified value in the header.
func urlContentModified(url string, historyTime *time.Time) (bool, error) {
resp, err := http.Get(url)
if err != nil {
return false, errors.Wrapf(err, "error getting %q", url)
}
defer resp.Body.Close()
if lastModified := resp.Header.Get("Last-Modified"); lastModified != "" {
lastModifiedTime, err := time.Parse(time.RFC1123, lastModified)
if err != nil {
return false, errors.Wrapf(err, "error parsing time for %q", url)
}
return lastModifiedTime.After(*historyTime), nil
}
logrus.Debugf("Response header did not have Last-Modified %q, will rebuild.", url)
return true, nil
}
26 changes: 26 additions & 0 deletions tests/bud.bats
Original file line number Diff line number Diff line change
Expand Up @@ -1797,3 +1797,29 @@ load helpers
buildah rm --all
buildah rmi --all --force
}

@test "bud containerfile with tar archive in copy" {
# First check to verify cache is used if the tar file does not change
target=copy-archive
date>bud/${target}/test; tar cJf - bud/${target}/test > bud/${target}/test.tar.xz
run_buildah bud --signature-policy ${TESTSDIR}/policy.json --layers -t ${target} ${TESTSDIR}/bud/${target}
expect_output --substring "COMMIT copy-archive"

run_buildah bud --signature-policy ${TESTSDIR}/policy.json --layers -t ${target} ${TESTSDIR}/bud/${target}
expect_output --substring " Using cache"
expect_output --substring "COMMIT copy-archive"

# Now test that we do NOT use cache if the tar file changes
date>bud/${target}/test; tar cJf - bud/${target}/test > bud/${target}/test.tar.xz
run_buildah bud --signature-policy ${TESTSDIR}/policy.json --layers -t ${target} ${TESTSDIR}/bud/${target}
expect_output --substring "COMMIT copy-archive"

date>bud/${target}/test; tar cJf - bud/${target}/test > bud/${target}/test.tar.xz
run_buildah bud --signature-policy ${TESTSDIR}/policy.json --layers -t ${target} ${TESTSDIR}/bud/${target}
if [[ "$output" =~ " Using cache" ]]; then
is "$output" "[no instance of 'Using cache']" "no cache used"
fi
expect_output --substring "COMMIT copy-archive"

rm bud/${target}/test*
}

0 comments on commit 2d7aa9d

Please sign in to comment.