diff --git a/errors.go b/errors.go index 2a91a8b..d037225 100644 --- a/errors.go +++ b/errors.go @@ -14,6 +14,16 @@ var ( ErrLstatNotPossible = fmt.Errorf("lstat is not possible") // ErrRelativeTo indicates that we could not make one path relative to another ErrRelativeTo = fmt.Errorf("failed to make path relative to other") - // ErrStopWalk indicates to the Walk function that the walk should be aborted - ErrStopWalk = fmt.Errorf("stop filesystem walk") + ErrWalk = fmt.Errorf("walk control") + // ErrSkipSubtree indicates to the walk function that the current subtree of + // directories should be skipped. It's recommended to only use this error + // with the AlgorithmPreOrderDepthFirst algorithm, as many other walk algorithms + // will not respect this error due to the nature of the ordering in which the + // algorithms visit each node of the filesystem tree. + ErrWalkSkipSubtree = fmt.Errorf("skip subtree: %w", ErrWalk) + // ErrStopWalk indicates to the Walk function that the walk should be aborted. + // DEPRECATED: Use ErrWalkStop + ErrStopWalk = ErrWalkStop + // ErrWalkStop indicates to the Walk function that the walk should be aborted. + ErrWalkStop = fmt.Errorf("stop filesystem walk: %w", ErrWalk) ) diff --git a/walk.go b/walk.go index 58f4234..60357a4 100644 --- a/walk.go +++ b/walk.go @@ -90,9 +90,13 @@ const ( // AlgorithmDepthFirst is a walk algorithm. More specifically, it is a post-order // depth first search whereby subdirectories are recursed into before // visiting the children of the current directory. + // DEPRECATED: Use AlgorithmPostOrderDepthFirst AlgorithmDepthFirst - // AlgorithmPreOrderDepthFirst is a walk algorithm. It visits all of a node's children - // before recursing into the subdirectories. + // AlgorithmPostOrderDepthFirst is a walk algorithm that recurses into all of its children + // before visiting any of a node's elements. + AlgorithmPostOrderDepthFirst + // AlgorithmPreOrderDepthFirst is a walk algorithm. It visits all of a node's elements + // before recursing into its children. AlgorithmPreOrderDepthFirst ) @@ -205,7 +209,7 @@ func (w *Walk) walkDFS(walkFn WalkFunc, root *Path, currentDepth int) error { // Since we are doing depth-first, we have to first recurse through all the directories, // and save all non-directory objects so we can defer handling at a later time. if IsDir(info.Mode()) { - if err := w.walkDFS(walkFn, child, currentDepth+1); err != nil { + if err := w.walkDFS(walkFn, child, currentDepth+1); err != nil && !errors.Is(err, ErrWalkSkipSubtree) { return err } } @@ -316,7 +320,9 @@ func (w *Walk) walkBasic(walkFn WalkFunc, root *Path, currentDepth int) error { err := w.iterateImmediateChildren(root, func(child *Path, info os.FileInfo, encounteredErr error) error { if IsDir(info.Mode()) { - if err := w.walkBasic(walkFn, child, currentDepth+1); err != nil { + // In the case the error is ErrWalkSkipSubtree, we ignore it as we've already + // exited from the recursive call. Any other error should be bubbled up. + if err := w.walkBasic(walkFn, child, currentDepth+1); err != nil && !errors.Is(err, ErrWalkSkipSubtree) { return err } } @@ -364,7 +370,7 @@ func (w *Walk) walkPreOrderDFS(walkFn WalkFunc, root *Path, currentDepth int) er return err } for _, dir := range dirs { - if err := w.walkPreOrderDFS(walkFn, dir, currentDepth+1); err != nil { + if err := w.walkPreOrderDFS(walkFn, dir, currentDepth+1); err != nil && !errors.Is(err, ErrWalkSkipSubtree) { return err } } @@ -374,12 +380,15 @@ func (w *Walk) walkPreOrderDFS(walkFn WalkFunc, root *Path, currentDepth int) er // WalkFunc is the function provided to the Walk function for each directory. type WalkFunc func(path *Path, info os.FileInfo, err error) error -// Walk walks the directory using the algorithm specified in the configuration. +// Walk walks the directory using the algorithm specified in the configuration. Your WalkFunc +// may return any of the ErrWalk* errors to control various behavior of the walker. See the documentation +// of each error for more details. func (w *Walk) Walk(walkFn WalkFunc) error { funcs := map[Algorithm]func(walkFn WalkFunc, root *Path, currentDepth int) error{ - AlgorithmBasic: w.walkBasic, - AlgorithmDepthFirst: w.walkDFS, - AlgorithmPreOrderDepthFirst: w.walkPreOrderDFS, + AlgorithmBasic: w.walkBasic, + AlgorithmDepthFirst: w.walkDFS, + AlgorithmPostOrderDepthFirst: w.walkDFS, + AlgorithmPreOrderDepthFirst: w.walkPreOrderDFS, } algoFunc, ok := funcs[w.Opts.Algorithm] if !ok { diff --git a/walk_test.go b/walk_test.go index 5b2f805..67a9aac 100644 --- a/walk_test.go +++ b/walk_test.go @@ -4,6 +4,7 @@ import ( "fmt" os "os" "reflect" + "slices" "testing" "github.com/spf13/afero" @@ -296,36 +297,212 @@ func TestNewWalk(t *testing.T) { } } -func TestWalkPreOrderDFS(t *testing.T) { - root := NewPath(t.TempDir()) - children := []string{ - "1.txt", - "2.txt", - "3.txt", - "subdir/4.txt", - "subdir/5.txt", +type FSObject struct { + path *Path + contents string + dir bool +} + +func TestWalkerOrder(t *testing.T) { + type test struct { + name string + algorithm Algorithm + walkOpts []WalkOptsFunc + objects []FSObject + expectedOrder []*Path + } + for _, tt := range []test{ + { + name: "Pre-Order DFS simple", + algorithm: AlgorithmPreOrderDepthFirst, + objects: []FSObject{ + {path: NewPath("1.txt")}, + {path: NewPath("2.txt")}, + {path: NewPath("3.txt")}, + {path: NewPath("subdir"), dir: true}, + {path: NewPath("subdir").Join("4.txt")}, + }, + walkOpts: []WalkOptsFunc{WalkVisitDirs(true)}, + expectedOrder: []*Path{ + NewPath("1.txt"), + NewPath("2.txt"), + NewPath("3.txt"), + NewPath("subdir"), + NewPath("subdir").Join("4.txt"), + }, + }, + { + name: "Post-Order DFS simple", + algorithm: AlgorithmDepthFirst, + objects: []FSObject{ + {path: NewPath("1.txt")}, + {path: NewPath("2.txt")}, + {path: NewPath("3.txt")}, + {path: NewPath("subdir"), dir: true}, + {path: NewPath("subdir").Join("4.txt")}, + }, + walkOpts: []WalkOptsFunc{WalkVisitDirs(true)}, + expectedOrder: []*Path{ + NewPath("subdir").Join("4.txt"), + NewPath("1.txt"), + NewPath("2.txt"), + NewPath("3.txt"), + NewPath("subdir"), + }, + }, + { + name: "Basic simple", + algorithm: AlgorithmBasic, + objects: []FSObject{ + {path: NewPath("1")}, + {path: NewPath("2"), dir: true}, + {path: NewPath("2").Join("3")}, + {path: NewPath("4")}, + }, + walkOpts: []WalkOptsFunc{WalkVisitDirs(true)}, + expectedOrder: []*Path{ + NewPath("1"), + NewPath("2").Join("3"), + NewPath("2"), + NewPath("4"), + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + root := NewPath(t.TempDir()) + for _, child := range tt.objects { + c := root.JoinPath(child.path) + if child.dir { + require.NoError(t, c.Mkdir()) + continue + } + require.NoError(t, c.WriteFile([]byte(child.contents))) + } + opts := []WalkOptsFunc{WalkAlgorithm(tt.algorithm), WalkSortChildren(true)} + opts = append(opts, tt.walkOpts...) + walker, err := NewWalk(root, opts...) + require.NoError(t, err) + + actualOrder := []*Path{} + require.NoError( + t, + walker.Walk(func(path *Path, info os.FileInfo, err error) error { + require.NoError(t, err) + relative, err := path.RelativeTo(root) + require.NoError(t, err) + actualOrder = append(actualOrder, relative) + return nil + }), + ) + require.Equal(t, len(tt.expectedOrder), len(actualOrder)) + for i, path := range tt.expectedOrder { + assert.True(t, path.Equals(actualOrder[i]), "incorrect ordering at %d: %s != %s", i, path, actualOrder[i]) + } + }) + } +} + +// TestErrWalkSkipSubtree tests the behavior of each algorithm when we tell it to skip a subtree. +func TestErrWalkSkipSubtree(t *testing.T) { + type test struct { + name string + algorithm Algorithm + tree []*Path + skipAt *Path + expected []*Path } - for _, child := range children { - c := root.Join(child) - require.NoError(t, c.Parent().MkdirAll()) - require.NoError(t, c.WriteFile([]byte("hello"))) + for _, tt := range []test{ + { + // In AlgorithmBasic, the ordering in which children/nodes are visited + // is filesystem and OS dependent. Some filesystems return paths in a lexically-ordered + // manner, some return them in the order in which they were created. For this test, + // we tell the walker to order the children before iterating over them. That way, + // the test will visit "subdir1/subdir2/foo.txt" before "subdir1/subdir2/subdir3/foo.txt", + // in which case we would tell the walker to skip the subdir3 subtree before it recursed. + "Basic", + AlgorithmBasic, + nil, + NewPath("subdir1").Join("subdir2", "foo.txt"), + []*Path{ + NewPath("foo1.txt"), + NewPath("subdir1").Join("foo.txt"), + NewPath("subdir1").Join("subdir2", "foo.txt"), + }, + }, + { + "PreOrderDFS", + AlgorithmPreOrderDepthFirst, + nil, + NewPath("subdir1").Join("subdir2", "foo.txt"), + []*Path{ + NewPath("foo1.txt"), + NewPath("subdir1").Join("foo.txt"), + NewPath("subdir1").Join("subdir2", "foo.txt"), + }, + }, + // Note about the PostOrderDFS case. ErrWalkSkipSubtree effectively + // has no meaning to this algorithm because in this case, the algorithm + // visits all children before visiting each node. Thus, our WalkFunc has + // no opportunity to tell it to skip a particular subtree. This test + // serves to ensure this behavior doesn't change. + { + "PostOrderDFS", + AlgorithmPostOrderDepthFirst, + nil, + NewPath("subdir1").Join("subdir2", "foo.txt"), + []*Path{ + NewPath("foo1.txt"), + NewPath("subdir1").Join("foo.txt"), + NewPath("subdir1").Join("subdir2", "foo.txt"), + NewPath("subdir1").Join("subdir2", "subdir3", "foo.txt"), + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + root := NewPath(t.TempDir()) + walker, err := NewWalk(root, WalkAlgorithm(tt.algorithm), WalkVisitDirs(false), WalkSortChildren(true)) + require.NoError(t, err) + + var tree []*Path + if tt.tree == nil { + tree = []*Path{ + NewPath("foo1.txt"), + NewPath("subdir1").Join("foo.txt"), + NewPath("subdir1").Join("subdir2", "foo.txt"), + NewPath("subdir1").Join("subdir2", "subdir3", "foo.txt"), + } + } + for _, path := range tree { + p := root.JoinPath(path) + require.NoError(t, p.Parent().MkdirAll()) + require.NoError(t, p.WriteFile([]byte(""))) + } + + visited := map[string]struct{}{} + require.NoError(t, walker.Walk(func(path *Path, info os.FileInfo, err error) error { + t.Logf("visited: %v", path.String()) + require.NoError(t, err) + rel, err := path.RelativeTo(root) + require.NoError(t, err) + visited[rel.String()] = struct{}{} + if rel.Equals(tt.skipAt) { + return ErrWalkSkipSubtree + } + return nil + })) + visitedSorted := []string{} + for key := range visited { + visitedSorted = append(visitedSorted, key) + } + slices.Sort(visitedSorted) + + expected := []string{} + for _, path := range tt.expected { + expected = append(expected, path.String()) + } + assert.Equal(t, expected, visitedSorted) + + }) } - walker, err := NewWalk( - root, - WalkAlgorithm(AlgorithmPreOrderDepthFirst), - WalkSortChildren(true), - WalkVisitDirs(false), - ) - require.NoError(t, err) - seenChildren := []string{} - err = walker.Walk(func(path *Path, info os.FileInfo, err error) error { - require.NoError(t, err) - relative, err := path.RelativeTo(root) - require.NoError(t, err) - seenChildren = append(seenChildren, relative.String()) - return nil - }) - require.NoError(t, err) - assert.Equal(t, children, seenChildren) }