diff --git a/imagebuildah/chroot_symlink_linux.go b/imagebuildah/chroot_symlink_linux.go index 2fb10ab8323..a6d3b4c8f9b 100644 --- a/imagebuildah/chroot_symlink_linux.go +++ b/imagebuildah/chroot_symlink_linux.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/containers/storage/pkg/reexec" "github.com/pkg/errors" @@ -14,11 +15,13 @@ import ( const ( symlinkChrootedCommand = "chrootsymlinks-resolve" + symlinkModifiedTime = "modtimesymlinks-resolve" maxSymlinksResolved = 40 ) func init() { reexec.Register(symlinkChrootedCommand, resolveChrootedSymlinks) + reexec.Register(symlinkModifiedTime, resolveSymlinkTimeModified) } // resolveSymlink uses a child subprocess to resolve any symlinks in filename @@ -68,6 +71,118 @@ func resolveChrootedSymlinks() { os.Exit(status) } +// main() for grandparent subprocess. Its main job is to shuttle stdio back +// and forth, managing a pseudo-terminal if we want one, for our child, the +// parent subprocess. +func resolveSymlinkTimeModified() { + status := 0 + flag.Parse() + if len(flag.Args()) < 1 { + os.Exit(1) + } + // Our first parameter is the directory to chroot into. + if err := unix.Chdir(flag.Arg(0)); err != nil { + fmt.Fprintf(os.Stderr, "chdir(): %v\n", err) + os.Exit(1) + } + if err := unix.Chroot(flag.Arg(0)); err != nil { + fmt.Fprintf(os.Stderr, "chroot(): %v\n", err) + os.Exit(1) + } + + // Our second parameter is the path name to evaluate for symbolic links. + // Our third parameter is the time the cached intermediate image was created. + // We check whether the modified time of the filepath we provide is after the time the cached image was created. + timeIsGreater, err := modTimeIsGreater(flag.Arg(0), flag.Arg(1), flag.Arg(2)) + if err != nil { + fmt.Fprintf(os.Stderr, "error checking if modified time of resolved symbolic link is greater: %v\n", err) + os.Exit(1) + } + if _, err := os.Stdout.WriteString(fmt.Sprintf("%v", timeIsGreater)); err != nil { + fmt.Fprintf(os.Stderr, "error writing string to stdout: %v\n", err) + os.Exit(1) + } + os.Exit(status) +} + +// resolveModifiedTime (in the grandparent process) checks filename for any symlinks, +// resolves it and compares the modified time of the file with historyTime, which is +// the creation time of the cached image. It returns true if filename was modified after +// historyTime, otherwise returns false. +func resolveModifiedTime(rootdir, filename, historyTime string) (bool, error) { + // The child process expects a chroot and one path that + // will be consulted relative to the chroot directory and evaluated + // for any symbolic links present. + cmd := reexec.Command(symlinkModifiedTime, rootdir, filename, historyTime) + output, err := cmd.CombinedOutput() + if err != nil { + return false, errors.Wrapf(err, string(output)) + } + // Hand back true/false depending on in the file was modified after the caches image was created. + return string(output) == "true", nil +} + +// modTimeIsGreater goes through the files added/copied in using the Dockerfile and +// checks the time stamp (follows symlinks) with the time stamp of when the cached +// image was created. IT compares the two and returns true if the file was modified +// after the cached image was created, otherwise it returns false. +func modTimeIsGreater(rootdir, path string, historyTime string) (bool, error) { + var timeIsGreater bool + + // Convert historyTime from string to time.Time for comparison + histTime, err := time.Parse(time.RFC3339Nano, historyTime) + if err != nil { + return false, errors.Wrapf(err, "error converting string to time.Time %q", historyTime) + } + + // Since we are chroot in rootdir, we want a relative path, i.e (path - rootdir) + relPath, err := filepath.Rel(rootdir, path) + if err != nil { + return false, errors.Wrapf(err, "error making path %q relative to %q", path, rootdir) + } + + // Walk the file tree and check the time stamps. + err = filepath.Walk(relPath, func(path string, info os.FileInfo, err error) error { + // If using cached images, it is possible for files that are being copied to come from + // previous build stages. But if using cached images, then the copied file won't exist + // since a container won't have been created for the previous build stage and info will be nil. + // In that case just return nil and continue on with using the cached image for the whole build process. + if info == nil { + return nil + } + modTime := info.ModTime() + if info.Mode()&os.ModeSymlink == os.ModeSymlink { + // Evaluate any symlink that occurs to get updated modified information + resolvedPath, err := filepath.EvalSymlinks(path) + if err != nil && os.IsNotExist(err) { + return errors.Wrapf(errDanglingSymlink, "%q", path) + } + if err != nil { + return errors.Wrapf(err, "error evaluating symlink %q", path) + } + fileInfo, err := os.Stat(resolvedPath) + if err != nil { + return errors.Wrapf(err, "error getting file info %q", resolvedPath) + } + modTime = fileInfo.ModTime() + } + if modTime.After(histTime) { + timeIsGreater = true + return nil + } + return nil + }) + + if err != nil { + // if error is due to dangling symlink, ignore error and return nil + if errors.Cause(err) == errDanglingSymlink { + return false, nil + } + return false, errors.Wrapf(err, "error walking file tree %q", path) + } + return timeIsGreater, err +} + // getSymbolic link goes through each part of the path and continues resolving symlinks as they appear. // Returns what the whole target path for what "path" resolves to. func getSymbolicLink(path string) (string, error) { diff --git a/imagebuildah/errors.go b/imagebuildah/errors.go new file mode 100644 index 00000000000..cf299656bcd --- /dev/null +++ b/imagebuildah/errors.go @@ -0,0 +1,7 @@ +package imagebuildah + +import "errors" + +var ( + errDanglingSymlink = errors.New("error evaluating dangling symlink") +) diff --git a/imagebuildah/stage_executor.go b/imagebuildah/stage_executor.go index 4d2d7054f06..bc589514d45 100644 --- a/imagebuildah/stage_executor.go +++ b/imagebuildah/stage_executor.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "net/http" "os" "path/filepath" "strconv" @@ -1085,13 +1086,83 @@ func (s *StageExecutor) intermediateImageExists(ctx context.Context, currNode *p } // children + currNode is the point of the Dockerfile we are currently at. if s.executor.historyMatches(baseHistory, currNode, history, addedContentDigest) { - return image.ID, nil + // a COPY or ADD command. + filesMatch, err := s.copiedFilesMatch(currNode, history[len(history)-1].Created) + if err != nil { + return "", errors.Wrapf(err, "error checking if copied files match") + } + if filesMatch { + return image.ID, nil + } } } } return "", nil } +// getFilesToCopy goes through node to get all the src files that are copied, added or downloaded. +// It is possible for the Dockerfile to have src as hom*, which means all files that have hom as a prefix. +// Another format is hom?.txt, which means all files that have that name format with the ? replaced by another character. +func (s *StageExecutor) getFilesToCopy(node *parser.Node) ([]string, error) { + currNode := node.Next + var src []string + for currNode.Next != nil { + if strings.HasPrefix(currNode.Value, "http://") || strings.HasPrefix(currNode.Value, "https://") { + src = append(src, currNode.Value) + currNode = currNode.Next + continue + } + matches, err := filepath.Glob(filepath.Join(s.copyFrom, currNode.Value)) + if err != nil { + return nil, errors.Wrapf(err, "error finding match for pattern %q", currNode.Value) + } + src = append(src, matches...) + currNode = currNode.Next + } + return src, nil +} + +// copiedFilesMatch checks to see if the node instruction is a COPY or ADD. +// If it is either of those two it checks the timestamps on all the files copied/added +// by the dockerfile. If the host version has a time stamp greater than the time stamp +// of the build, the build will not use the cached version and will rebuild. +func (s *StageExecutor) copiedFilesMatch(node *parser.Node, historyTime *time.Time) (bool, error) { + if node.Value != "add" && node.Value != "copy" { + return true, nil + } + + src, err := s.getFilesToCopy(node) + if err != nil { + return false, err + } + for _, item := range src { + // for urls, check the Last-Modified field in the header. + if strings.HasPrefix(item, "http://") || strings.HasPrefix(item, "https://") { + urlContentNew, err := urlContentModified(item, historyTime) + if err != nil { + return false, err + } + if urlContentNew { + return false, nil + } + continue + } + // Walks the file tree for local files and uses chroot to ensure we don't escape out of the allowed path + // when resolving any symlinks. + // Change the time format to ensure we don't run into a parsing error when converting again from string + // to time.Time. It is a known Go issue that the conversions cause errors sometimes, so specifying a particular + // time format here when converting to a string. + timeIsGreater, err := resolveModifiedTime(s.copyFrom, item, historyTime.Format(time.RFC3339Nano)) + if err != nil { + return false, errors.Wrapf(err, "error resolving symlinks and comparing modified times: %q", item) + } + if timeIsGreater { + return false, nil + } + } + return true, nil +} + // commit writes the container's contents to an image, using a passed-in tag as // the name if there is one, generating a unique ID-based one otherwise. func (s *StageExecutor) commit(ctx context.Context, ib *imagebuilder.Builder, createdBy string, emptyLayer bool, output string) (string, reference.Canonical, error) { @@ -1215,3 +1286,23 @@ func (s *StageExecutor) EnsureContainerPath(path string) error { } return nil } + +// urlContentModified sends a get request to the url and checks if the header has a value in +// Last-Modified, and if it does compares the time stamp to that of the history of the cached image. +// returns true if there is no Last-Modified value in the header. +func urlContentModified(url string, historyTime *time.Time) (bool, error) { + resp, err := http.Get(url) + if err != nil { + return false, errors.Wrapf(err, "error getting %q", url) + } + defer resp.Body.Close() + if lastModified := resp.Header.Get("Last-Modified"); lastModified != "" { + lastModifiedTime, err := time.Parse(time.RFC1123, lastModified) + if err != nil { + return false, errors.Wrapf(err, "error parsing time for %q", url) + } + return lastModifiedTime.After(*historyTime), nil + } + logrus.Debugf("Response header did not have Last-Modified %q, will rebuild.", url) + return true, nil +} diff --git a/tests/bud.bats b/tests/bud.bats index 3bd482e7915..7b2abf5f2d8 100644 --- a/tests/bud.bats +++ b/tests/bud.bats @@ -1797,3 +1797,29 @@ load helpers buildah rm --all buildah rmi --all --force } + +@test "bud containerfile with tar archive in copy" { + # First check to verify cache is used if the tar file does not change + target=copy-archive + date>bud/${target}/test; tar cJf - bud/${target}/test > bud/${target}/test.tar.xz + run_buildah bud --signature-policy ${TESTSDIR}/policy.json --layers -t ${target} ${TESTSDIR}/bud/${target} + expect_output --substring "COMMIT copy-archive" + + run_buildah bud --signature-policy ${TESTSDIR}/policy.json --layers -t ${target} ${TESTSDIR}/bud/${target} + expect_output --substring " Using cache" + expect_output --substring "COMMIT copy-archive" + + # Now test that we do NOT use cache if the tar file changes + date>bud/${target}/test; tar cJf - bud/${target}/test > bud/${target}/test.tar.xz + run_buildah bud --signature-policy ${TESTSDIR}/policy.json --layers -t ${target} ${TESTSDIR}/bud/${target} + expect_output --substring "COMMIT copy-archive" + + date>bud/${target}/test; tar cJf - bud/${target}/test > bud/${target}/test.tar.xz + run_buildah bud --signature-policy ${TESTSDIR}/policy.json --layers -t ${target} ${TESTSDIR}/bud/${target} + if [[ "$output" =~ " Using cache" ]]; then + is "$output" "[no instance of 'Using cache']" "no cache used" + fi + expect_output --substring "COMMIT copy-archive" + + rm bud/${target}/test* +}