Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IncludePatterns and ExcludePatterns options for Copy #101

Merged
merged 7 commits into from
May 3, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 219 additions & 27 deletions copy/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ import (
"time"

"github.com/containerd/continuity/fs"
"github.com/docker/docker/pkg/fileutils"
"github.com/pkg/errors"
"github.com/tonistiigi/fsutil/prefix"
)

var bufferPool = &sync.Pool{
Expand Down Expand Up @@ -86,7 +88,10 @@ func Copy(ctx context.Context, srcRoot, src, dstRoot, dst string, opts ...Opt) e
return err
}

c := newCopier(ci.Chown, ci.Utime, ci.Mode, ci.XAttrErrorHandler)
c, err := newCopier(ci.Chown, ci.Utime, ci.Mode, ci.XAttrErrorHandler, ci.IncludePatterns, ci.ExcludePatterns)
if err != nil {
return err
}
srcs := []string{src}

if ci.AllowWildcards {
Expand All @@ -109,7 +114,8 @@ func Copy(ctx context.Context, srcRoot, src, dstRoot, dst string, opts ...Opt) e
if err != nil {
return err
}
if err := c.copy(ctx, srcFollowed, dst, false); err != nil {
includeAll := len(c.includePatterns) == 0
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excludepatterns don't matter in here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

includeAll means we can skip evaluating the include patterns. Usually it's used when a parent dir matches an include pattern. It doesn't skip evaluating exclude patterns.

Maybe skipIncludePatterns would be a better name?

if err := c.copy(ctx, srcFollowed, "", dst, false, includeAll); err != nil {
return err
}
}
Expand Down Expand Up @@ -162,6 +168,10 @@ type CopyInfo struct {
XAttrErrorHandler XAttrErrorHandler
CopyDirContents bool
FollowLinks bool
// Include only files/dirs matching at least one of these patterns
IncludePatterns []string
// Exclude files/dir matching any of these patterns (even if they match an include pattern)
ExcludePatterns []string
}

type Opt func(*CopyInfo)
Expand Down Expand Up @@ -197,48 +207,110 @@ func AllowXAttrErrors(ci *CopyInfo) {
WithXAttrErrorHandler(h)(ci)
}

func WithIncludePattern(includePattern string) Opt {
return func(ci *CopyInfo) {
ci.IncludePatterns = append(ci.IncludePatterns, includePattern)
}
}

func WithExcludePattern(excludePattern string) Opt {
return func(ci *CopyInfo) {
ci.ExcludePatterns = append(ci.ExcludePatterns, excludePattern)
}
}

type copier struct {
chown Chowner
utime *time.Time
mode *int
inodes map[uint64]string
xattrErrorHandler XAttrErrorHandler
chown Chowner
utime *time.Time
mode *int
inodes map[uint64]string
xattrErrorHandler XAttrErrorHandler
includePatterns []string
excludePatternMatcher *fileutils.PatternMatcher
}

func newCopier(chown Chowner, tm *time.Time, mode *int, xeh XAttrErrorHandler) *copier {
func newCopier(chown Chowner, tm *time.Time, mode *int, xeh XAttrErrorHandler, includePatterns, excludePatterns []string) (*copier, error) {
if xeh == nil {
xeh = func(dst, src, key string, err error) error {
return err
}
}
return &copier{inodes: map[uint64]string{}, chown: chown, utime: tm, xattrErrorHandler: xeh, mode: mode}

var pm *fileutils.PatternMatcher
if len(excludePatterns) != 0 {
var err error
pm, err = fileutils.NewPatternMatcher(excludePatterns)
if err != nil {
return nil, errors.Wrapf(err, "invalid excludepatterns: %s", excludePatterns)
}
}

return &copier{
inodes: map[uint64]string{},
chown: chown,
utime: tm,
xattrErrorHandler: xeh,
mode: mode,
includePatterns: includePatterns,
excludePatternMatcher: pm,
}, nil
}

// dest is always clean
func (c *copier) copy(ctx context.Context, src, target string, overwriteTargetMetadata bool) error {
func (c *copier) copy(ctx context.Context, src, srcComponents, target string, overwriteTargetMetadata, includeAll bool) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
}

fi, err := os.Lstat(src)
if err != nil {
return errors.Wrapf(err, "failed to stat %s", src)
}

var include bool
if srcComponents != "" {
if !includeAll {
include, includeAll, err = c.include(srcComponents, fi)
if err != nil {
return err
}
if !include {
return nil
}
}
exclude, err := c.exclude(srcComponents, fi)
if err != nil {
return err
}
if exclude {
return nil
}
}

if !fi.IsDir() {
if include {
if err := c.createParentDirs(src, srcComponents, target, overwriteTargetMetadata); err != nil {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this quite a significant added performance overhead to rerun these checks where most of the time directories exist? If true then I think we need some caching. As everything is sorted shouldn't take too much memory.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does add overhead, but only to the case where an include pattern is matched - so it won't regress performance of any existing cases.

Happy to add caching if you think it's worth the complexity.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is needed. The number of syscalls per file is quite important for copy.

return err
}
}
if err := ensureEmptyFileTarget(target); err != nil {
return err
}
} else if includeAll {
if err := c.createParentDirs(src, srcComponents, target, overwriteTargetMetadata); err != nil {
return err
}
}

copyFileInfo := true

switch {
case fi.IsDir():
if created, err := c.copyDirectory(ctx, src, target, fi, overwriteTargetMetadata); err != nil {
if created, err := c.copyDirectory(ctx, src, srcComponents, target, fi, overwriteTargetMetadata, includeAll); err != nil {
return err
} else if !overwriteTargetMetadata {
} else if !overwriteTargetMetadata || !includeAll {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bit confused about the includeAll check in here as exclude patterns seem to be not affected by this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

created will always be false if includeAll is false (because in that case we delay creation of parent dirs). So this is shorter version of:

if !includeAll {
        // directory may not have been created yet, so don't try to set its metadata
        copyFileInfo = false
}

copyFileInfo = created
}
case (fi.Mode() & os.ModeType) == 0:
Expand Down Expand Up @@ -282,26 +354,127 @@ func (c *copier) copy(ctx context.Context, src, target string, overwriteTargetMe
return nil
}

func (c *copier) copyDirectory(ctx context.Context, src, dst string, stat os.FileInfo, overwriteTargetMetadata bool) (bool, error) {
func (c *copier) include(path string, fi os.FileInfo) (bool, bool, error) {
matched := false
partial := true
for _, pattern := range c.includePatterns {
if fi.IsDir() {
pattern = strings.TrimSuffix(pattern, string(filepath.Separator))
}

if ok, p := prefix.Match(pattern, path); ok {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do IncludePatterns/ExcludePatterns use different format/algorithm? Afaics only ExcludePatterns support !. Is there a difference of ExcludePatterns and potentially doing ! in IncludePatterns.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to match the logic in

if opt != nil {

I think I'm following the same logic (it has separate handling for IncludePatterns and ExcludePatterns and doesn't appear to handle ! in IncludePatterns), but maybe I'm missing something.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I don't remember why it is like this. I guess for .dockerignore is the only use-case atm that required ! and that is the excludes list. IncludePatterns is not used at all anymore as well and FollowPaths is used instead. I guess the FollowPaths case where symlinks need to be followed does not apply here?

I think it is important that something like COPY --filter can be implemented with this. I'm not quite sure if atm client would need to convert some of the patterns to a negative version. But I guess if we would only use one input then with current implementation depending on if user wants to mostly define include or exclude paths one of them would be much slower. Matching with the options of walker is not the most important here so if you think there is a better/simpler logic we can use instead lmk.

matched = true
if !p {
partial = false
break
}
}
}

if !matched {
return false, false, nil
}
if fi.IsDir() {
return true, !partial, nil
}
return !partial, !partial, nil
}

func (c *copier) exclude(path string, fi os.FileInfo) (bool, error) {
if c.excludePatternMatcher == nil {
return false, nil
}

m, err := c.excludePatternMatcher.Matches(path)
if err != nil {
return false, errors.Wrap(err, "failed to match excludepatterns")
}
if m {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, prefer:

if !m {
    return false, nil
}

dirSlash...

if fi.IsDir() && c.excludePatternMatcher.Exclusions() {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for this PR but this looks non-optimal. I guess ! is rarely used in practice.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modeled on this code from walker.go:

for _, pat := range pm.Patterns() {

dirSlash := path + string(filepath.Separator)
for _, pat := range c.excludePatternMatcher.Patterns() {
if !pat.Exclusion() {
continue
}
patStr := pat.String() + string(filepath.Separator)
if strings.HasPrefix(patStr, dirSlash) {
return false, nil
}
}
}
return true, nil
}

return false, nil
}

// Delayed creation of parent directories when a file or dir matches an include
// pattern.
func (c *copier) createParentDirs(src, srcComponents, target string, overwriteTargetMetadata bool) error {
if len(c.includePatterns) == 0 {
return nil
}

count := strings.Count(srcComponents, string(filepath.Separator))
if count != 0 {
srcPaths := []string{src}
targetPaths := []string{target}
for i := 0; i != count; i++ {
srcParentDir, _ := filepath.Split(srcPaths[len(srcPaths)-1])
if len(srcParentDir) > 1 {
srcParentDir = strings.TrimSuffix(srcParentDir, string(filepath.Separator))
}
srcPaths = append(srcPaths, srcParentDir)

targetParentDir, _ := filepath.Split(targetPaths[len(targetPaths)-1])
if len(targetParentDir) > 1 {
targetParentDir = strings.TrimSuffix(targetParentDir, string(filepath.Separator))
}
targetPaths = append(targetPaths, targetParentDir)
}
for i := count; i > 0; i-- {
fi, err := os.Stat(srcPaths[i])
if err != nil {
return errors.Wrapf(err, "failed to stat %s", src)
}
if !fi.IsDir() {
return errors.Errorf("%s is not a directory", srcPaths[i])
}

created, err := copyDirectoryOnly(srcPaths[i], targetPaths[i], fi, overwriteTargetMetadata)
if err != nil {
return err
}
if created {
if err := c.copyFileInfo(fi, targetPaths[i]); err != nil {
return errors.Wrap(err, "failed to copy file info")
}

if err := copyXAttrs(targetPaths[i], srcPaths[i], c.xattrErrorHandler); err != nil {
return errors.Wrap(err, "failed to copy xattrs")
}
}
}
}
return nil
}

func (c *copier) copyDirectory(ctx context.Context, src, srcComponents, dst string, stat os.FileInfo, overwriteTargetMetadata, includeAll bool) (bool, error) {
if !stat.IsDir() {
return false, errors.Errorf("source is not directory")
}

created := false

if st, err := os.Lstat(dst); err != nil {
if !os.IsNotExist(err) {
return false, err
}
created = true
if err := os.Mkdir(dst, stat.Mode()); err != nil {
return created, errors.Wrapf(err, "failed to mkdir %s", dst)
}
} else if !st.IsDir() {
return false, errors.Errorf("cannot copy to non-directory: %s", dst)
} else if overwriteTargetMetadata {
if err := os.Chmod(dst, stat.Mode()); err != nil {
return false, errors.Wrapf(err, "failed to chmod on %s", dst)
// If there are no include patterns or this directory matched an include
// pattern exactly, go ahead and create the directory. Otherwise, delay to
// handle include patterns like a/*/c where we do not want to create a/b
// until we encounter a/b/c.
if includeAll {
var err error
created, err = copyDirectoryOnly(src, dst, stat, overwriteTargetMetadata)
if err != nil {
return created, err
}
}

Expand All @@ -311,14 +484,33 @@ func (c *copier) copyDirectory(ctx context.Context, src, dst string, stat os.Fil
}

for _, fi := range fis {
if err := c.copy(ctx, filepath.Join(src, fi.Name()), filepath.Join(dst, fi.Name()), true); err != nil {
if err := c.copy(ctx, filepath.Join(src, fi.Name()), filepath.Join(srcComponents, fi.Name()), filepath.Join(dst, fi.Name()), true, includeAll); err != nil {
return false, err
}
}

return created, nil
}

func copyDirectoryOnly(src, dst string, stat os.FileInfo, overwriteTargetMetadata bool) (bool, error) {
if st, err := os.Lstat(dst); err != nil {
if !os.IsNotExist(err) {
return false, err
}
if err := os.Mkdir(dst, stat.Mode()); err != nil {
return false, errors.Wrapf(err, "failed to mkdir %s", dst)
}
return true, nil
} else if !st.IsDir() {
return false, errors.Errorf("cannot copy to non-directory: %s", dst)
} else if overwriteTargetMetadata {
if err := os.Chmod(dst, stat.Mode()); err != nil {
return false, errors.Wrapf(err, "failed to chmod on %s", dst)
}
}
return false, nil
}

func ensureEmptyFileTarget(dst string) error {
fi, err := os.Lstat(dst)
if err != nil {
Expand Down
Loading