From 579006754f7ae37f7e1a545ef807dbcd052912cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Tue, 29 Mar 2022 23:04:38 +0100 Subject: [PATCH] expand: reimplement globstar globbing for correctness The previous globstar implementation was breadth-first search, whereas Bash implements depth-first search. Moreover, when recursing into directories, we could stop early and give an error to the user when encountering a non-directory. Instead, we want to simply not recurse into that path. Add tests for both cases, too. Fixes #829. --- expand/expand.go | 53 ++++++++++++++++++++++++------------------- interp/interp_test.go | 12 ++++++++++ 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/expand/expand.go b/expand/expand.go index 0cc29ab4c..bd809953e 100644 --- a/expand/expand.go +++ b/expand/expand.go @@ -773,6 +773,9 @@ func (cfg *Config) glob(base, pat string) ([]string, error) { // ReadDir("/foo") glob "*" for i, part := range parts { + // Keep around for debugging. + // log.Printf("matches %q part %d %q", matches, i, part) + wantDir := i < len(parts)-1 switch { case part == "", part == ".", part == "..": @@ -816,30 +819,33 @@ func (cfg *Config) glob(base, pat string) ([]string, error) { matches = newMatches continue case part == "**" && cfg.GlobStar: - for i, match := range matches { - // "a/**" should match "a/ a/b a/b/cfg ..."; note - // how the zero-match case has a trailing - // separator. - matches[i] = pathJoin2(match, "") - } - // expand all the possible levels of ** - latest := matches - for { - var newMatches []string - for _, dir := range latest { - var err error - newMatches, err = cfg.globDir(base, dir, rxGlobStar, wantDir, newMatches) - if err != nil { - return nil, err - } + // Find all recursive matches for "**". + // Note that we need the results to be in depth-first order, + // and to avoid recursion, we use a slice as a stack. + // Since we pop from the back, we populate the stack backwards. + stack := make([]string, 0, len(matches)) + for i := len(matches) - 1; i >= 0; i-- { + // "a/**" should match "a/ a/b a/b/cfg ..."; + // note how the zero-match case has a trailing separator. + stack = append(stack, pathJoin2(matches[i], "")) + } + matches = matches[:0] + var newMatches []string // to reuse its capacity + for len(stack) > 0 { + dir := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + // Don't include the original "" match as it's not a valid path. + if dir != "" { + matches = append(matches, dir) } - if len(newMatches) == 0 { - // not another level of directories to - // try; stop - break + + // If dir is not a directory, we keep the stack as-is and continue. + newMatches = newMatches[:0] + newMatches, _ = cfg.globDir(base, dir, rxGlobStar, wantDir, newMatches) + for i := len(newMatches) - 1; i >= 0; i-- { + stack = append(stack, newMatches[i]) } - matches = append(matches, newMatches...) - latest = newMatches } continue } @@ -868,7 +874,8 @@ func (cfg *Config) globDir(base, dir string, rx *regexp.Regexp, wantDir bool, ma } infos, err := cfg.ReadDir(fullDir) if err != nil { - return nil, err + // We still want to return matches, for the sake of reusing slices. + return matches, err } for _, info := range infos { name := info.Name() diff --git a/interp/interp_test.go b/interp/interp_test.go index b9b8b305a..67b50a02c 100644 --- a/interp/interp_test.go +++ b/interp/interp_test.go @@ -2557,6 +2557,18 @@ set +o pipefail "shopt -s globstar; mkdir -p a/b/c; echo **/c | sed 's@\\\\@/@g'", "a/b/c\n", }, + { + "shopt -s globstar; mkdir -p a/b; touch c; echo ** | sed 's@\\\\@/@g'", + "a a/b c\n", + }, + { + "shopt -s globstar; mkdir -p a/b; touch c; echo **/ | sed 's@\\\\@/@g'", + "a/ a/b/\n", + }, + { + "shopt -s globstar; mkdir -p a/b/c a/d; echo ** | sed 's@\\\\@/@g'", + "a a/b a/b/c a/d\n", + }, { "mkdir foo; touch foo/bar; echo */bar */bar/ | sed 's@\\\\@/@g'", "foo/bar */bar/\n",