Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IncludePatterns and ExcludePatterns options for Copy #101

Merged
merged 7 commits into from
May 3, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 220 additions & 27 deletions copy/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ import (
"time"

"github.com/containerd/continuity/fs"
"github.com/docker/docker/pkg/fileutils"
"github.com/pkg/errors"
"github.com/tonistiigi/fsutil/prefix"
)

var bufferPool = &sync.Pool{
Expand Down Expand Up @@ -86,7 +88,10 @@ func Copy(ctx context.Context, srcRoot, src, dstRoot, dst string, opts ...Opt) e
return err
}

c := newCopier(ci.Chown, ci.Utime, ci.Mode, ci.XAttrErrorHandler)
c, err := newCopier(ci.Chown, ci.Utime, ci.Mode, ci.XAttrErrorHandler, ci.IncludePatterns, ci.ExcludePatterns)
if err != nil {
return err
}
srcs := []string{src}

if ci.AllowWildcards {
Expand All @@ -109,7 +114,8 @@ func Copy(ctx context.Context, srcRoot, src, dstRoot, dst string, opts ...Opt) e
if err != nil {
return err
}
if err := c.copy(ctx, srcFollowed, dst, false); err != nil {
skipIncludePatterns := len(c.includePatterns) == 0
if err := c.copy(ctx, srcFollowed, "", dst, false, skipIncludePatterns); err != nil {
return err
}
}
Expand Down Expand Up @@ -162,6 +168,10 @@ type CopyInfo struct {
XAttrErrorHandler XAttrErrorHandler
CopyDirContents bool
FollowLinks bool
// Include only files/dirs matching at least one of these patterns
IncludePatterns []string
// Exclude files/dir matching any of these patterns (even if they match an include pattern)
ExcludePatterns []string
}

type Opt func(*CopyInfo)
Expand Down Expand Up @@ -197,48 +207,117 @@ func AllowXAttrErrors(ci *CopyInfo) {
WithXAttrErrorHandler(h)(ci)
}

func WithIncludePattern(includePattern string) Opt {
return func(ci *CopyInfo) {
ci.IncludePatterns = append(ci.IncludePatterns, includePattern)
}
}

func WithExcludePattern(excludePattern string) Opt {
return func(ci *CopyInfo) {
ci.ExcludePatterns = append(ci.ExcludePatterns, excludePattern)
}
}

type copier struct {
chown Chowner
utime *time.Time
mode *int
inodes map[uint64]string
xattrErrorHandler XAttrErrorHandler
chown Chowner
utime *time.Time
mode *int
inodes map[uint64]string
xattrErrorHandler XAttrErrorHandler
includePatterns []string
excludePatternMatcher *fileutils.PatternMatcher
parentDirs []parentDir
}

type parentDir struct {
srcPath string
dstPath string
copied bool
}

func newCopier(chown Chowner, tm *time.Time, mode *int, xeh XAttrErrorHandler) *copier {
func newCopier(chown Chowner, tm *time.Time, mode *int, xeh XAttrErrorHandler, includePatterns, excludePatterns []string) (*copier, error) {
if xeh == nil {
xeh = func(dst, src, key string, err error) error {
return err
}
}
return &copier{inodes: map[uint64]string{}, chown: chown, utime: tm, xattrErrorHandler: xeh, mode: mode}

var pm *fileutils.PatternMatcher
if len(excludePatterns) != 0 {
var err error
pm, err = fileutils.NewPatternMatcher(excludePatterns)
if err != nil {
return nil, errors.Wrapf(err, "invalid excludepatterns: %s", excludePatterns)
}
}

return &copier{
inodes: map[uint64]string{},
chown: chown,
utime: tm,
xattrErrorHandler: xeh,
mode: mode,
includePatterns: includePatterns,
excludePatternMatcher: pm,
}, nil
}

// dest is always clean
func (c *copier) copy(ctx context.Context, src, target string, overwriteTargetMetadata bool) error {
func (c *copier) copy(ctx context.Context, src, srcComponents, target string, overwriteTargetMetadata, skipIncludePatterns bool) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
}

fi, err := os.Lstat(src)
if err != nil {
return errors.Wrapf(err, "failed to stat %s", src)
}

var include bool
if srcComponents != "" {
if !skipIncludePatterns {
include, skipIncludePatterns, err = c.include(srcComponents, fi)
if err != nil {
return err
}
if !include {
return nil
}
}
exclude, err := c.exclude(srcComponents, fi)
if err != nil {
return err
}
if exclude {
return nil
}
}

if !fi.IsDir() {
if include {
if err := c.createParentDirs(src, srcComponents, target, overwriteTargetMetadata); err != nil {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this quite a significant added performance overhead to rerun these checks where most of the time directories exist? If true then I think we need some caching. As everything is sorted shouldn't take too much memory.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does add overhead, but only to the case where an include pattern is matched - so it won't regress performance of any existing cases.

Happy to add caching if you think it's worth the complexity.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is needed. The number of syscalls per file is quite important for copy.

return err
}
}
if err := ensureEmptyFileTarget(target); err != nil {
return err
}
} else if skipIncludePatterns {
if err := c.createParentDirs(src, srcComponents, target, overwriteTargetMetadata); err != nil {
return err
}
}

copyFileInfo := true

switch {
case fi.IsDir():
if created, err := c.copyDirectory(ctx, src, target, fi, overwriteTargetMetadata); err != nil {
if created, err := c.copyDirectory(ctx, src, srcComponents, target, fi, overwriteTargetMetadata, skipIncludePatterns); err != nil {
return err
} else if !overwriteTargetMetadata {
} else if !overwriteTargetMetadata || !skipIncludePatterns {
copyFileInfo = created
}
case (fi.Mode() & os.ModeType) == 0:
Expand Down Expand Up @@ -282,43 +361,157 @@ func (c *copier) copy(ctx context.Context, src, target string, overwriteTargetMe
return nil
}

func (c *copier) copyDirectory(ctx context.Context, src, dst string, stat os.FileInfo, overwriteTargetMetadata bool) (bool, error) {
func (c *copier) include(path string, fi os.FileInfo) (bool, bool, error) {
matched := false
partial := true
for _, pattern := range c.includePatterns {
if fi.IsDir() {
pattern = strings.TrimSuffix(pattern, string(filepath.Separator))
}

if ok, p := prefix.Match(pattern, path, false); ok {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be simplified:

matched, partial = prefix.Match(pattern, path, false)
if matched && !partial {
    break 
}

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That could potentially overwrite a true value of matched with false

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah missed that.

matched = true
if !p {
partial = false
break
}
}
}

if !matched {
return false, false, nil
}
if fi.IsDir() {
return true, !partial, nil
}
return !partial, !partial, nil
}

func (c *copier) exclude(path string, fi os.FileInfo) (bool, error) {
if c.excludePatternMatcher == nil {
return false, nil
}

m, err := c.excludePatternMatcher.Matches(path)
if err != nil {
return false, errors.Wrap(err, "failed to match excludepatterns")
}
if m {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, prefer:

if !m {
    return false, nil
}

dirSlash...

if fi.IsDir() && c.excludePatternMatcher.Exclusions() {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for this PR but this looks non-optimal. I guess ! is rarely used in practice.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modeled on this code from walker.go:

for _, pat := range pm.Patterns() {

dirSlash := path + string(filepath.Separator)
for _, pat := range c.excludePatternMatcher.Patterns() {
if !pat.Exclusion() {
continue
}
patStr := pat.String() + string(filepath.Separator)
if strings.HasPrefix(patStr, dirSlash) {
return false, nil
}
}
}
return true, nil
}

return false, nil
}

// Delayed creation of parent directories when a file or dir matches an include
// pattern.
func (c *copier) createParentDirs(src, srcComponents, target string, overwriteTargetMetadata bool) error {
for i, parentDir := range c.parentDirs {
if parentDir.copied {
continue
}

fi, err := os.Stat(parentDir.srcPath)
if err != nil {
return errors.Wrapf(err, "failed to stat %s", src)
}
if !fi.IsDir() {
return errors.Errorf("%s is not a directory", parentDir.srcPath)
}

created, err := copyDirectoryOnly(parentDir.srcPath, parentDir.dstPath, fi, overwriteTargetMetadata)
if err != nil {
return err
}
if created {
if err := c.copyFileInfo(fi, parentDir.dstPath); err != nil {
return errors.Wrap(err, "failed to copy file info")
}

if err := copyXAttrs(parentDir.dstPath, parentDir.srcPath, c.xattrErrorHandler); err != nil {
return errors.Wrap(err, "failed to copy xattrs")
}
}

c.parentDirs[i].copied = true
}
return nil
}

func (c *copier) copyDirectory(ctx context.Context, src, srcComponents, dst string, stat os.FileInfo, overwriteTargetMetadata, skipIncludePatterns bool) (bool, error) {
if !stat.IsDir() {
return false, errors.Errorf("source is not directory")
}

created := false

if st, err := os.Lstat(dst); err != nil {
if !os.IsNotExist(err) {
return false, err
}
created = true
if err := os.Mkdir(dst, stat.Mode()); err != nil {
return created, errors.Wrapf(err, "failed to mkdir %s", dst)
}
} else if !st.IsDir() {
return false, errors.Errorf("cannot copy to non-directory: %s", dst)
} else if overwriteTargetMetadata {
if err := os.Chmod(dst, stat.Mode()); err != nil {
return false, errors.Wrapf(err, "failed to chmod on %s", dst)
// If there are no include patterns or this directory matched an include
// pattern exactly, go ahead and create the directory. Otherwise, delay to
// handle include patterns like a/*/c where we do not want to create a/b
// until we encounter a/b/c.
if skipIncludePatterns {
var err error
created, err = copyDirectoryOnly(src, dst, stat, overwriteTargetMetadata)
if err != nil {
return created, err
}
}

c.parentDirs = append(c.parentDirs, parentDir{
srcPath: src,
dstPath: dst,
copied: skipIncludePatterns,
})

defer func() {
c.parentDirs = c.parentDirs[:len(c.parentDirs)-1]
}()

fis, err := ioutil.ReadDir(src)
if err != nil {
return false, errors.Wrapf(err, "failed to read %s", src)
}

for _, fi := range fis {
if err := c.copy(ctx, filepath.Join(src, fi.Name()), filepath.Join(dst, fi.Name()), true); err != nil {
if err := c.copy(ctx, filepath.Join(src, fi.Name()), filepath.Join(srcComponents, fi.Name()), filepath.Join(dst, fi.Name()), true, skipIncludePatterns); err != nil {
return false, err
}
}

return created, nil
}

func copyDirectoryOnly(src, dst string, stat os.FileInfo, overwriteTargetMetadata bool) (bool, error) {
if st, err := os.Lstat(dst); err != nil {
if !os.IsNotExist(err) {
return false, err
}
if err := os.Mkdir(dst, stat.Mode()); err != nil {
return false, errors.Wrapf(err, "failed to mkdir %s", dst)
}
return true, nil
} else if !st.IsDir() {
return false, errors.Errorf("cannot copy to non-directory: %s", dst)
} else if overwriteTargetMetadata {
if err := os.Chmod(dst, stat.Mode()); err != nil {
return false, errors.Wrapf(err, "failed to chmod on %s", dst)
}
}
return false, nil
}

func ensureEmptyFileTarget(dst string) error {
fi, err := os.Lstat(dst)
if err != nil {
Expand Down
Loading