Skip to content

Commit

Permalink
Merge pull request #2082 from aaronlehmann/copy-include-exclude
Browse files Browse the repository at this point in the history
Add IncludePatterns and ExcludePatterns options for Copy
  • Loading branch information
tonistiigi authored Jun 1, 2021
2 parents d919be2 + 6f5ea71 commit 8df5671
Show file tree
Hide file tree
Showing 23 changed files with 1,832 additions and 442 deletions.
225 changes: 167 additions & 58 deletions cache/contenthash/checksum.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ import (
"os"
"path"
"path/filepath"
"strings"
"sync"

"github.com/docker/docker/pkg/fileutils"
"github.com/docker/docker/pkg/idtools"
iradix "github.com/hashicorp/go-immutable-radix"
"github.com/hashicorp/golang-lru/simplelru"
Expand Down Expand Up @@ -45,12 +47,15 @@ func getDefaultManager() *cacheManager {
// header, "/dir" is for contents. For the root node "" (empty string) is the
// key for root, "/" for the root header

func Checksum(ctx context.Context, ref cache.ImmutableRef, path string, followLinks bool, s session.Group) (digest.Digest, error) {
return getDefaultManager().Checksum(ctx, ref, path, followLinks, s)
type ChecksumOpts struct {
FollowLinks bool
Wildcard bool
IncludePatterns []string
ExcludePatterns []string
}

func ChecksumWildcard(ctx context.Context, ref cache.ImmutableRef, path string, followLinks bool, s session.Group) (digest.Digest, error) {
return getDefaultManager().ChecksumWildcard(ctx, ref, path, followLinks, s)
func Checksum(ctx context.Context, ref cache.ImmutableRef, path string, opts ChecksumOpts, s session.Group) (digest.Digest, error) {
return getDefaultManager().Checksum(ctx, ref, path, opts, s)
}

func GetCacheContext(ctx context.Context, md *metadata.StorageItem, idmap *idtools.IdentityMapping) (CacheContext, error) {
Expand All @@ -66,16 +71,15 @@ func ClearCacheContext(md *metadata.StorageItem) {
}

type CacheContext interface {
Checksum(ctx context.Context, ref cache.Mountable, p string, followLinks bool, s session.Group) (digest.Digest, error)
ChecksumWildcard(ctx context.Context, ref cache.Mountable, p string, followLinks bool, s session.Group) (digest.Digest, error)
Checksum(ctx context.Context, ref cache.Mountable, p string, opts ChecksumOpts, s session.Group) (digest.Digest, error)
HandleChange(kind fsutil.ChangeKind, p string, fi os.FileInfo, err error) error
}

type Hashed interface {
Digest() digest.Digest
}

type Wildcard struct {
type IncludedPath struct {
Path string
Record *CacheRecord
}
Expand All @@ -86,20 +90,12 @@ type cacheManager struct {
lruMu sync.Mutex
}

func (cm *cacheManager) Checksum(ctx context.Context, ref cache.ImmutableRef, p string, followLinks bool, s session.Group) (digest.Digest, error) {
func (cm *cacheManager) Checksum(ctx context.Context, ref cache.ImmutableRef, p string, opts ChecksumOpts, s session.Group) (digest.Digest, error) {
cc, err := cm.GetCacheContext(ctx, ensureOriginMetadata(ref.Metadata()), ref.IdentityMapping())
if err != nil {
return "", nil
}
return cc.Checksum(ctx, ref, p, followLinks, s)
}

func (cm *cacheManager) ChecksumWildcard(ctx context.Context, ref cache.ImmutableRef, p string, followLinks bool, s session.Group) (digest.Digest, error) {
cc, err := cm.GetCacheContext(ctx, ensureOriginMetadata(ref.Metadata()), ref.IdentityMapping())
if err != nil {
return "", nil
}
return cc.ChecksumWildcard(ctx, ref, p, followLinks, s)
return cc.Checksum(ctx, ref, p, opts, s)
}

func (cm *cacheManager) GetCacheContext(ctx context.Context, md *metadata.StorageItem, idmap *idtools.IdentityMapping) (CacheContext, error) {
Expand Down Expand Up @@ -264,12 +260,17 @@ func (cc *cacheContext) save() error {
return cc.md.SetExternal(keyContentHash, dt)
}

// HandleChange notifies the source about a modification operation
func (cc *cacheContext) HandleChange(kind fsutil.ChangeKind, p string, fi os.FileInfo, err error) (retErr error) {
func keyPath(p string) string {
p = path.Join("/", filepath.ToSlash(p))
if p == "/" {
p = ""
}
return p
}

// HandleChange notifies the source about a modification operation
func (cc *cacheContext) HandleChange(kind fsutil.ChangeKind, p string, fi os.FileInfo, err error) (retErr error) {
p = keyPath(p)
k := convertPathToKey([]byte(p))

deleteDir := func(cr *CacheRecord) {
Expand Down Expand Up @@ -382,36 +383,40 @@ func (cc *cacheContext) HandleChange(kind fsutil.ChangeKind, p string, fi os.Fil
return nil
}

func (cc *cacheContext) ChecksumWildcard(ctx context.Context, mountable cache.Mountable, p string, followLinks bool, s session.Group) (digest.Digest, error) {
func (cc *cacheContext) Checksum(ctx context.Context, mountable cache.Mountable, p string, opts ChecksumOpts, s session.Group) (digest.Digest, error) {
m := &mount{mountable: mountable, session: s}
defer m.clean()

wildcards, err := cc.wildcards(ctx, m, p)
if !opts.Wildcard && len(opts.IncludePatterns) == 0 && len(opts.ExcludePatterns) == 0 {
return cc.checksumFollow(ctx, m, p, opts.FollowLinks)
}

includedPaths, err := cc.includedPaths(ctx, m, p, opts)
if err != nil {
return "", err
}

if followLinks {
for i, w := range wildcards {
if opts.FollowLinks {
for i, w := range includedPaths {
if w.Record.Type == CacheRecordTypeSymlink {
dgst, err := cc.checksumFollow(ctx, m, w.Path, followLinks)
dgst, err := cc.checksumFollow(ctx, m, w.Path, opts.FollowLinks)
if err != nil {
return "", err
}
wildcards[i].Record = &CacheRecord{Digest: dgst}
includedPaths[i].Record = &CacheRecord{Digest: dgst}
}
}
}
if len(wildcards) == 0 {
if len(includedPaths) == 0 {
return digest.FromBytes([]byte{}), nil
}

if len(wildcards) == 1 && path.Base(p) == path.Base(wildcards[0].Path) {
return wildcards[0].Record.Digest, nil
if len(includedPaths) == 1 && path.Base(p) == path.Base(includedPaths[0].Path) {
return includedPaths[0].Record.Digest, nil
}

digester := digest.Canonical.Digester()
for i, w := range wildcards {
for i, w := range includedPaths {
if i != 0 {
digester.Hash().Write([]byte{0})
}
Expand All @@ -421,13 +426,6 @@ func (cc *cacheContext) ChecksumWildcard(ctx context.Context, mountable cache.Mo
return digester.Digest(), nil
}

func (cc *cacheContext) Checksum(ctx context.Context, mountable cache.Mountable, p string, followLinks bool, s session.Group) (digest.Digest, error) {
m := &mount{mountable: mountable, session: s}
defer m.clean()

return cc.checksumFollow(ctx, m, p, followLinks)
}

func (cc *cacheContext) checksumFollow(ctx context.Context, m *mount, p string, follow bool) (digest.Digest, error) {
const maxSymlinkLimit = 255
i := 0
Expand All @@ -452,7 +450,7 @@ func (cc *cacheContext) checksumFollow(ctx context.Context, m *mount, p string,
}
}

func (cc *cacheContext) wildcards(ctx context.Context, m *mount, p string) ([]*Wildcard, error) {
func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, opts ChecksumOpts) ([]*IncludedPath, error) {
cc.mu.Lock()
defer cc.mu.Unlock()

Expand All @@ -478,32 +476,103 @@ func (cc *cacheContext) wildcards(ctx context.Context, m *mount, p string) ([]*W
}
}()

p = path.Join("/", filepath.ToSlash(p))
if p == "/" {
p = ""
endsInSep := len(p) != 0 && p[len(p)-1] == filepath.Separator
p = keyPath(p)

var includePatternMatcher *fileutils.PatternMatcher
if len(opts.IncludePatterns) != 0 {
rootedIncludePatterns := make([]string, len(opts.IncludePatterns))
for i, includePattern := range opts.IncludePatterns {
rootedIncludePatterns[i] = keyPath(includePattern)
}
includePatternMatcher, err = fileutils.NewPatternMatcher(rootedIncludePatterns)
if err != nil {
return nil, errors.Wrapf(err, "invalid includepatterns: %s", opts.IncludePatterns)
}
}

wildcards := make([]*Wildcard, 0, 2)
var excludePatternMatcher *fileutils.PatternMatcher
if len(opts.ExcludePatterns) != 0 {
rootedExcludePatterns := make([]string, len(opts.ExcludePatterns))
for i, excludePattern := range opts.ExcludePatterns {
rootedExcludePatterns[i] = keyPath(excludePattern)
}
excludePatternMatcher, err = fileutils.NewPatternMatcher(rootedExcludePatterns)
if err != nil {
return nil, errors.Wrapf(err, "invalid excludepatterns: %s", opts.ExcludePatterns)
}
}

includedPaths := make([]*IncludedPath, 0, 2)

txn := cc.tree.Txn()
root = txn.Root()
var updated bool
var (
updated bool
iter *iradix.Seeker
k []byte
kOk bool
)

if opts.Wildcard {
iter = root.Seek([]byte{})
k, _, kOk = iter.Next()
} else {
k = convertPathToKey([]byte(p))
if _, kOk = root.Get(k); kOk {
iter = root.Seek(k)
}
}

iter := root.Seek([]byte{})
for {
k, _, ok := iter.Next()
if !ok {
break
var (
parentDirHeaders []*IncludedPath
lastMatchedDir string
)

for kOk {
fn := string(convertKeyToPath(k))

for len(parentDirHeaders) != 0 {
lastParentDir := parentDirHeaders[len(parentDirHeaders)-1]
if strings.HasPrefix(fn, lastParentDir.Path+"/") {
break
}
parentDirHeaders = parentDirHeaders[:len(parentDirHeaders)-1]
}

dirHeader := false
if len(k) > 0 && k[len(k)-1] == byte(0) {
dirHeader = true
fn = fn[:len(fn)-1]
if fn == p && endsInSep {
// We don't include the metadata header for a source dir which ends with a separator
k, _, kOk = iter.Next()
continue
}
}
if opts.Wildcard {
if lastMatchedDir == "" || !strings.HasPrefix(fn, lastMatchedDir+"/") {
include, err := path.Match(p, fn)
if err != nil {
return nil, err
}
if !include {
k, _, kOk = iter.Next()
continue
}
lastMatchedDir = fn
}
} else if !strings.HasPrefix(fn+"/", p+"/") {
k, _, kOk = iter.Next()
continue
}
fn := convertKeyToPath(k)
b, err := path.Match(p, string(fn))

shouldInclude, err := shouldIncludePath(p, fn, includePatternMatcher, excludePatternMatcher)
if err != nil {
return nil, err
}
if !b {
if !shouldInclude && !dirHeader {
k, _, kOk = iter.Next()
continue
}

Expand All @@ -515,25 +584,65 @@ func (cc *cacheContext) wildcards(ctx context.Context, m *mount, p string) ([]*W
updated = true
}

wildcards = append(wildcards, &Wildcard{Path: string(fn), Record: cr})

if cr.Type == CacheRecordTypeDir {
iter = root.Seek(append(k, 0, 0xff))
// We only hash dir headers and files, not dir contents. Hashing
// dir contents could be wrong if there are exclusions within the
// dir.
shouldInclude = false
}

if !shouldInclude {
if cr.Type == CacheRecordTypeDirHeader {
// We keep track of non-included parent dir headers in case an
// include pattern matches a file inside one of these dirs.
parentDirHeaders = append(parentDirHeaders, &IncludedPath{Path: fn, Record: cr})
}
} else {
includedPaths = append(includedPaths, parentDirHeaders...)
parentDirHeaders = nil
includedPaths = append(includedPaths, &IncludedPath{Path: fn, Record: cr})
}
k, _, kOk = iter.Next()
}

cc.tree = txn.Commit()
cc.dirty = updated

return wildcards, nil
return includedPaths, nil
}

func (cc *cacheContext) checksumNoFollow(ctx context.Context, m *mount, p string) (*CacheRecord, error) {
p = path.Join("/", filepath.ToSlash(p))
if p == "/" {
p = ""
func shouldIncludePath(
p string,
candidate string,
includePatternMatcher *fileutils.PatternMatcher,
excludePatternMatcher *fileutils.PatternMatcher,
) (bool, error) {
if includePatternMatcher != nil {
m, err := includePatternMatcher.Matches(filepath.FromSlash(candidate))
if err != nil {
return false, errors.Wrap(err, "failed to match includepatterns")
}
if !m {
return false, nil
}
}

if excludePatternMatcher != nil {
m, err := excludePatternMatcher.Matches(filepath.FromSlash(candidate))
if err != nil {
return false, errors.Wrap(err, "failed to match excludepatterns")
}
if m {
return false, nil
}
}

return true, nil
}

func (cc *cacheContext) checksumNoFollow(ctx context.Context, m *mount, p string) (*CacheRecord, error) {
p = keyPath(p)

cc.mu.RLock()
if cc.txn == nil {
root := cc.tree.Root()
Expand Down
Loading

0 comments on commit 8df5671

Please sign in to comment.