-
Notifications
You must be signed in to change notification settings - Fork 44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add IncludePatterns and ExcludePatterns options for Copy #101
Changes from 1 commit
620ac87
08e5241
e5bab58
b39921c
accc0fa
34d3aed
10412d7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -11,7 +11,9 @@ import ( | |||
"time" | ||||
|
||||
"github.com/containerd/continuity/fs" | ||||
"github.com/docker/docker/pkg/fileutils" | ||||
"github.com/pkg/errors" | ||||
"github.com/tonistiigi/fsutil/prefix" | ||||
) | ||||
|
||||
var bufferPool = &sync.Pool{ | ||||
|
@@ -86,7 +88,10 @@ func Copy(ctx context.Context, srcRoot, src, dstRoot, dst string, opts ...Opt) e | |||
return err | ||||
} | ||||
|
||||
c := newCopier(ci.Chown, ci.Utime, ci.Mode, ci.XAttrErrorHandler) | ||||
c, err := newCopier(ci.Chown, ci.Utime, ci.Mode, ci.XAttrErrorHandler, ci.IncludePatterns, ci.ExcludePatterns) | ||||
if err != nil { | ||||
return err | ||||
} | ||||
srcs := []string{src} | ||||
|
||||
if ci.AllowWildcards { | ||||
|
@@ -109,7 +114,8 @@ func Copy(ctx context.Context, srcRoot, src, dstRoot, dst string, opts ...Opt) e | |||
if err != nil { | ||||
return err | ||||
} | ||||
if err := c.copy(ctx, srcFollowed, dst, false); err != nil { | ||||
includeAll := len(c.includePatterns) == 0 | ||||
if err := c.copy(ctx, srcFollowed, "", dst, false, includeAll); err != nil { | ||||
return err | ||||
} | ||||
} | ||||
|
@@ -162,6 +168,10 @@ type CopyInfo struct { | |||
XAttrErrorHandler XAttrErrorHandler | ||||
CopyDirContents bool | ||||
FollowLinks bool | ||||
// Include only files/dirs matching at least one of these patterns | ||||
IncludePatterns []string | ||||
// Exclude files/dir matching any of these patterns (even if they match an include pattern) | ||||
ExcludePatterns []string | ||||
} | ||||
|
||||
type Opt func(*CopyInfo) | ||||
|
@@ -197,48 +207,110 @@ func AllowXAttrErrors(ci *CopyInfo) { | |||
WithXAttrErrorHandler(h)(ci) | ||||
} | ||||
|
||||
func WithIncludePattern(includePattern string) Opt { | ||||
return func(ci *CopyInfo) { | ||||
ci.IncludePatterns = append(ci.IncludePatterns, includePattern) | ||||
} | ||||
} | ||||
|
||||
func WithExcludePattern(excludePattern string) Opt { | ||||
return func(ci *CopyInfo) { | ||||
ci.ExcludePatterns = append(ci.ExcludePatterns, excludePattern) | ||||
} | ||||
} | ||||
|
||||
type copier struct { | ||||
chown Chowner | ||||
utime *time.Time | ||||
mode *int | ||||
inodes map[uint64]string | ||||
xattrErrorHandler XAttrErrorHandler | ||||
chown Chowner | ||||
utime *time.Time | ||||
mode *int | ||||
inodes map[uint64]string | ||||
xattrErrorHandler XAttrErrorHandler | ||||
includePatterns []string | ||||
excludePatternMatcher *fileutils.PatternMatcher | ||||
} | ||||
|
||||
func newCopier(chown Chowner, tm *time.Time, mode *int, xeh XAttrErrorHandler) *copier { | ||||
func newCopier(chown Chowner, tm *time.Time, mode *int, xeh XAttrErrorHandler, includePatterns, excludePatterns []string) (*copier, error) { | ||||
if xeh == nil { | ||||
xeh = func(dst, src, key string, err error) error { | ||||
return err | ||||
} | ||||
} | ||||
return &copier{inodes: map[uint64]string{}, chown: chown, utime: tm, xattrErrorHandler: xeh, mode: mode} | ||||
|
||||
var pm *fileutils.PatternMatcher | ||||
if len(excludePatterns) != 0 { | ||||
var err error | ||||
pm, err = fileutils.NewPatternMatcher(excludePatterns) | ||||
if err != nil { | ||||
return nil, errors.Wrapf(err, "invalid excludepatterns: %s", excludePatterns) | ||||
} | ||||
} | ||||
|
||||
return &copier{ | ||||
inodes: map[uint64]string{}, | ||||
chown: chown, | ||||
utime: tm, | ||||
xattrErrorHandler: xeh, | ||||
mode: mode, | ||||
includePatterns: includePatterns, | ||||
excludePatternMatcher: pm, | ||||
}, nil | ||||
} | ||||
|
||||
// dest is always clean | ||||
func (c *copier) copy(ctx context.Context, src, target string, overwriteTargetMetadata bool) error { | ||||
func (c *copier) copy(ctx context.Context, src, srcComponents, target string, overwriteTargetMetadata, includeAll bool) error { | ||||
select { | ||||
case <-ctx.Done(): | ||||
return ctx.Err() | ||||
default: | ||||
} | ||||
|
||||
fi, err := os.Lstat(src) | ||||
if err != nil { | ||||
return errors.Wrapf(err, "failed to stat %s", src) | ||||
} | ||||
|
||||
var include bool | ||||
if srcComponents != "" { | ||||
if !includeAll { | ||||
include, includeAll, err = c.include(srcComponents, fi) | ||||
if err != nil { | ||||
return err | ||||
} | ||||
if !include { | ||||
return nil | ||||
} | ||||
} | ||||
exclude, err := c.exclude(srcComponents, fi) | ||||
if err != nil { | ||||
return err | ||||
} | ||||
if exclude { | ||||
return nil | ||||
} | ||||
} | ||||
|
||||
if !fi.IsDir() { | ||||
if include { | ||||
if err := c.createParentDirs(src, srcComponents, target, overwriteTargetMetadata); err != nil { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this quite a significant added performance overhead to rerun these checks where most of the time directories exist? If true then I think we need some caching. As everything is sorted shouldn't take too much memory. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This does add overhead, but only to the case where an include pattern is matched - so it won't regress performance of any existing cases. Happy to add caching if you think it's worth the complexity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is needed. The number of syscalls per file is quite important for copy. |
||||
return err | ||||
} | ||||
} | ||||
if err := ensureEmptyFileTarget(target); err != nil { | ||||
return err | ||||
} | ||||
} else if includeAll { | ||||
if err := c.createParentDirs(src, srcComponents, target, overwriteTargetMetadata); err != nil { | ||||
return err | ||||
} | ||||
} | ||||
|
||||
copyFileInfo := true | ||||
|
||||
switch { | ||||
case fi.IsDir(): | ||||
if created, err := c.copyDirectory(ctx, src, target, fi, overwriteTargetMetadata); err != nil { | ||||
if created, err := c.copyDirectory(ctx, src, srcComponents, target, fi, overwriteTargetMetadata, includeAll); err != nil { | ||||
return err | ||||
} else if !overwriteTargetMetadata { | ||||
} else if !overwriteTargetMetadata || !includeAll { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. bit confused about the includeAll check in here as exclude patterns seem to be not affected by this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
if !includeAll {
// directory may not have been created yet, so don't try to set its metadata
copyFileInfo = false
} |
||||
copyFileInfo = created | ||||
} | ||||
case (fi.Mode() & os.ModeType) == 0: | ||||
|
@@ -282,26 +354,127 @@ func (c *copier) copy(ctx context.Context, src, target string, overwriteTargetMe | |||
return nil | ||||
} | ||||
|
||||
func (c *copier) copyDirectory(ctx context.Context, src, dst string, stat os.FileInfo, overwriteTargetMetadata bool) (bool, error) { | ||||
func (c *copier) include(path string, fi os.FileInfo) (bool, bool, error) { | ||||
matched := false | ||||
partial := true | ||||
for _, pattern := range c.includePatterns { | ||||
if fi.IsDir() { | ||||
pattern = strings.TrimSuffix(pattern, string(filepath.Separator)) | ||||
} | ||||
|
||||
if ok, p := prefix.Match(pattern, path); ok { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do IncludePatterns/ExcludePatterns use different format/algorithm? Afaics only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to match the logic in Line 87 in 8599091
I think I'm following the same logic (it has separate handling for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. I don't remember why it is like this. I guess for I think it is important that something like |
||||
matched = true | ||||
if !p { | ||||
partial = false | ||||
break | ||||
} | ||||
} | ||||
} | ||||
|
||||
if !matched { | ||||
return false, false, nil | ||||
} | ||||
if fi.IsDir() { | ||||
return true, !partial, nil | ||||
} | ||||
return !partial, !partial, nil | ||||
} | ||||
|
||||
func (c *copier) exclude(path string, fi os.FileInfo) (bool, error) { | ||||
if c.excludePatternMatcher == nil { | ||||
return false, nil | ||||
} | ||||
|
||||
m, err := c.excludePatternMatcher.Matches(path) | ||||
if err != nil { | ||||
return false, errors.Wrap(err, "failed to match excludepatterns") | ||||
} | ||||
if m { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit, prefer: if !m {
return false, nil
}
dirSlash... |
||||
if fi.IsDir() && c.excludePatternMatcher.Exclusions() { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not for this PR but this looks non-optimal. I guess There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Modeled on this code from Line 131 in 8599091
|
||||
dirSlash := path + string(filepath.Separator) | ||||
for _, pat := range c.excludePatternMatcher.Patterns() { | ||||
if !pat.Exclusion() { | ||||
continue | ||||
} | ||||
patStr := pat.String() + string(filepath.Separator) | ||||
if strings.HasPrefix(patStr, dirSlash) { | ||||
return false, nil | ||||
} | ||||
} | ||||
} | ||||
return true, nil | ||||
} | ||||
|
||||
return false, nil | ||||
} | ||||
|
||||
// Delayed creation of parent directories when a file or dir matches an include | ||||
// pattern. | ||||
func (c *copier) createParentDirs(src, srcComponents, target string, overwriteTargetMetadata bool) error { | ||||
if len(c.includePatterns) == 0 { | ||||
return nil | ||||
} | ||||
|
||||
count := strings.Count(srcComponents, string(filepath.Separator)) | ||||
if count != 0 { | ||||
srcPaths := []string{src} | ||||
targetPaths := []string{target} | ||||
for i := 0; i != count; i++ { | ||||
srcParentDir, _ := filepath.Split(srcPaths[len(srcPaths)-1]) | ||||
if len(srcParentDir) > 1 { | ||||
srcParentDir = strings.TrimSuffix(srcParentDir, string(filepath.Separator)) | ||||
} | ||||
srcPaths = append(srcPaths, srcParentDir) | ||||
|
||||
targetParentDir, _ := filepath.Split(targetPaths[len(targetPaths)-1]) | ||||
if len(targetParentDir) > 1 { | ||||
targetParentDir = strings.TrimSuffix(targetParentDir, string(filepath.Separator)) | ||||
} | ||||
targetPaths = append(targetPaths, targetParentDir) | ||||
} | ||||
for i := count; i > 0; i-- { | ||||
fi, err := os.Stat(srcPaths[i]) | ||||
if err != nil { | ||||
return errors.Wrapf(err, "failed to stat %s", src) | ||||
} | ||||
if !fi.IsDir() { | ||||
return errors.Errorf("%s is not a directory", srcPaths[i]) | ||||
} | ||||
|
||||
created, err := copyDirectoryOnly(srcPaths[i], targetPaths[i], fi, overwriteTargetMetadata) | ||||
if err != nil { | ||||
return err | ||||
} | ||||
if created { | ||||
if err := c.copyFileInfo(fi, targetPaths[i]); err != nil { | ||||
return errors.Wrap(err, "failed to copy file info") | ||||
} | ||||
|
||||
if err := copyXAttrs(targetPaths[i], srcPaths[i], c.xattrErrorHandler); err != nil { | ||||
return errors.Wrap(err, "failed to copy xattrs") | ||||
} | ||||
} | ||||
} | ||||
} | ||||
return nil | ||||
} | ||||
|
||||
func (c *copier) copyDirectory(ctx context.Context, src, srcComponents, dst string, stat os.FileInfo, overwriteTargetMetadata, includeAll bool) (bool, error) { | ||||
if !stat.IsDir() { | ||||
return false, errors.Errorf("source is not directory") | ||||
} | ||||
|
||||
created := false | ||||
|
||||
if st, err := os.Lstat(dst); err != nil { | ||||
if !os.IsNotExist(err) { | ||||
return false, err | ||||
} | ||||
created = true | ||||
if err := os.Mkdir(dst, stat.Mode()); err != nil { | ||||
return created, errors.Wrapf(err, "failed to mkdir %s", dst) | ||||
} | ||||
} else if !st.IsDir() { | ||||
return false, errors.Errorf("cannot copy to non-directory: %s", dst) | ||||
} else if overwriteTargetMetadata { | ||||
if err := os.Chmod(dst, stat.Mode()); err != nil { | ||||
return false, errors.Wrapf(err, "failed to chmod on %s", dst) | ||||
// If there are no include patterns or this directory matched an include | ||||
// pattern exactly, go ahead and create the directory. Otherwise, delay to | ||||
// handle include patterns like a/*/c where we do not want to create a/b | ||||
// until we encounter a/b/c. | ||||
if includeAll { | ||||
var err error | ||||
created, err = copyDirectoryOnly(src, dst, stat, overwriteTargetMetadata) | ||||
if err != nil { | ||||
return created, err | ||||
} | ||||
} | ||||
|
||||
|
@@ -311,14 +484,33 @@ func (c *copier) copyDirectory(ctx context.Context, src, dst string, stat os.Fil | |||
} | ||||
|
||||
for _, fi := range fis { | ||||
if err := c.copy(ctx, filepath.Join(src, fi.Name()), filepath.Join(dst, fi.Name()), true); err != nil { | ||||
if err := c.copy(ctx, filepath.Join(src, fi.Name()), filepath.Join(srcComponents, fi.Name()), filepath.Join(dst, fi.Name()), true, includeAll); err != nil { | ||||
return false, err | ||||
} | ||||
} | ||||
|
||||
return created, nil | ||||
} | ||||
|
||||
func copyDirectoryOnly(src, dst string, stat os.FileInfo, overwriteTargetMetadata bool) (bool, error) { | ||||
if st, err := os.Lstat(dst); err != nil { | ||||
if !os.IsNotExist(err) { | ||||
return false, err | ||||
} | ||||
if err := os.Mkdir(dst, stat.Mode()); err != nil { | ||||
return false, errors.Wrapf(err, "failed to mkdir %s", dst) | ||||
} | ||||
return true, nil | ||||
} else if !st.IsDir() { | ||||
return false, errors.Errorf("cannot copy to non-directory: %s", dst) | ||||
} else if overwriteTargetMetadata { | ||||
if err := os.Chmod(dst, stat.Mode()); err != nil { | ||||
return false, errors.Wrapf(err, "failed to chmod on %s", dst) | ||||
} | ||||
} | ||||
return false, nil | ||||
} | ||||
|
||||
func ensureEmptyFileTarget(dst string) error { | ||||
fi, err := os.Lstat(dst) | ||||
if err != nil { | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Excludepatterns don't matter in here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
includeAll
means we can skip evaluating the include patterns. Usually it's used when a parent dir matches an include pattern. It doesn't skip evaluating exclude patterns.Maybe
skipIncludePatterns
would be a better name?