diff --git a/core/src/vcs/git-repo.ts b/core/src/vcs/git-repo.ts index 8690c44f1e..d45ea28117 100644 --- a/core/src/vcs/git-repo.ts +++ b/core/src/vcs/git-repo.ts @@ -13,6 +13,8 @@ import fsExtra from "fs-extra" import { pathToCacheContext } from "../cache.js" import { FileTree } from "./file-tree.js" import { normalize, sep } from "path" +import { stableStringify } from "../util/string.js" +import { hashString } from "../util/util.js" const { pathExists } = fsExtra @@ -62,10 +64,7 @@ export class GitRepoHandler extends GitHandler { override async getFiles(params: GetFilesParams): Promise { const { log, path, pathDescription, filter, failOnPrompt = false } = params - if (params.include && params.include.length === 0) { - // No need to proceed, nothing should be included - return [] - } + let scanRoot = params.scanRoot || path if (!(await pathExists(path))) { log.warn(`${pathDescription} ${path} could not be found.`) @@ -77,12 +76,32 @@ export class GitRepoHandler extends GitHandler { return [] } - let scanRoot = params.scanRoot || path - if (!params.scanRoot && params.pathDescription !== "submodule") { scanRoot = await this.getRepoRoot(log, path, failOnPrompt) } + const scanFromProjectRoot = scanRoot === this.garden?.projectRoot + const { augmentedExcludes, augmentedIncludes } = await getIncludeExcludeFiles({ ...params, scanFromProjectRoot }) + + const hashedFilterParams = hashString( + stableStringify({ + filter: filter ? filter.toString() : undefined, // We hash the source code of the filter function if provided. + augmentedIncludes, + augmentedExcludes, + }) + ) + const filteredFilesCacheKey = ["git-repo-files", path, hashedFilterParams] + + const cached = this.cache.get(log, filteredFilesCacheKey) as VcsFile[] | undefined + if (cached) { + return cached + } + + if (params.include && params.include.length === 0) { + // No need to proceed, nothing should be included + return [] + } + const fileTree = await this.scanRepo({ log, path: scanRoot, @@ -96,23 +115,20 @@ export class GitRepoHandler extends GitHandler { // Thus, some excluded files can appear in the resulting fileTree. // It happens because the new "repo" mode does not use `--glob-pathspecs` flag // and does some explicit glob patterns augmentation that misses some edge-cases. - exclude: params.exclude, + // exclude: params.exclude, }) - const moduleFiles = fileTree.getFilesAtPath(path) - - const scanFromProjectRoot = scanRoot === this.garden?.projectRoot - const { augmentedExcludes, augmentedIncludes } = await getIncludeExcludeFiles({ ...params, scanFromProjectRoot }) + const filesAtPath = fileTree.getFilesAtPath(path) log.debug( - `Found ${moduleFiles.length} files in module path, filtering by ${augmentedIncludes.length} include and ${augmentedExcludes.length} exclude globs` + `Found ${filesAtPath.length} files in path, filtering by ${augmentedIncludes.length} include and ${augmentedExcludes.length} exclude globs` ) log.silly(() => `Include globs: ${augmentedIncludes.join(", ")}`) log.silly(() => augmentedExcludes.length > 0 ? `Exclude globs: ${augmentedExcludes.join(", ")}` : "No exclude globs" ) - const filtered = moduleFiles.filter(({ path: p }) => { + const filtered = filesAtPath.filter(({ path: p }) => { if (filter && !filter(p)) { return false } @@ -122,11 +138,15 @@ export class GitRepoHandler extends GitHandler { // but that caused issues with the glob matching on windows due to backslashes const relativePath = p.replace(`${path}${sep}`, "") log.silly(() => `Checking if ${relativePath} matches include/exclude globs`) - return matchPath(relativePath, augmentedIncludes, augmentedExcludes) + const matches = matchPath(relativePath, augmentedIncludes, augmentedExcludes) + log.debug(`rel path: ${relativePath}, matches: ${matches}`) + return matches }) log.debug(`Found ${filtered.length} files in module path after glob matching`) + this.cache.set(log, filteredFilesCacheKey, filtered, pathToCacheContext(path)) + return filtered } @@ -140,7 +160,7 @@ export class GitRepoHandler extends GitHandler { const { log, path } = params const key = ["git-repo-files", path] - let existing = this.cache.get(log, key) as FileTree + let existing = this.cache.get(log, key) as FileTree | undefined if (existing) { params.log.silly(() => `Found cached repository match at ${path}`) diff --git a/core/src/vcs/git.ts b/core/src/vcs/git.ts index 6f228fdc8e..099c7ec5fd 100644 --- a/core/src/vcs/git.ts +++ b/core/src/vcs/git.ts @@ -200,13 +200,12 @@ export class GitHandler extends VcsHandler { } const { log, path, pathDescription = "directory", filter, failOnPrompt = false } = params - const { exclude, hasIncludes, include } = await getIncludeExcludeFiles(params) const gitLog = log .createLog({ name: "git" }) .debug( - `Scanning ${pathDescription} at ${path}\n → Includes: ${include || "(none)"}\n → Excludes: ${ - exclude || "(none)" + `Scanning ${pathDescription} at ${path}\n → Includes: ${params.include || "(none)"}\n → Excludes: ${ + params.exclude || "(none)" }` ) @@ -225,6 +224,7 @@ export class GitHandler extends VcsHandler { throw err } } + const { exclude, hasIncludes, include } = await getIncludeExcludeFiles(params) let files: VcsFile[] = [] diff --git a/core/test/unit/src/vcs/git.ts b/core/test/unit/src/vcs/git.ts index f8d4c648e3..ed4baeb767 100644 --- a/core/test/unit/src/vcs/git.ts +++ b/core/test/unit/src/vcs/git.ts @@ -428,86 +428,113 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { // the exclusion paths with and without glob prefix **/ works in the same way. context("when only exclude filter is specified", () => { context("should filter out files that match the exclude filter", () => { - const excludedFilenameNoExt = "foo" - const excludedFilenameTxt = `${excludedFilenameNoExt}.txt` - const excludedFilenameWildcard = `${excludedFilenameNoExt}.*` + const fooTxt = `foo.txt` + const fooWildcard = `foo.*` + + let fooPath: string + let barPath: string + let dirPath: string + let dirFooPath: string + let dirBarPath: string + const testParams = [ { name: "by exact filename without globs", - exclusionBuilder: () => excludedFilenameTxt, + exclude: () => fooTxt, + expectedFiles: () => (gitScanMode === "repo" ? [barPath, dirBarPath, dirFooPath] : [barPath, dirBarPath]), }, { name: "by exact filename with prefix globs", - exclusionBuilder: () => join("**", excludedFilenameTxt), + exclude: () => join("**", fooTxt), + expectedFiles: () => [barPath, dirBarPath], }, { name: "by filename with wildcard extension without prefix globs", - exclusionBuilder: () => excludedFilenameWildcard, + exclude: () => fooWildcard, + expectedFiles: () => (gitScanMode === "repo" ? [barPath, dirBarPath, dirFooPath] : [barPath, dirBarPath]), }, { name: "by filename with wildcard extension with prefix globs", - exclusionBuilder: () => join("**", excludedFilenameWildcard), + exclude: () => join("**", fooWildcard), + expectedFiles: () => [barPath, dirBarPath], }, ] for (const testParam of testParams) { it(testParam.name, async () => { + fooPath = resolve(tmpPath, "foo.txt") + barPath = resolve(tmpPath, "bar.txt") + // const notExcludedDirName = "dir" + dirPath = resolve(tmpPath, "dir") + dirFooPath = resolve(dirPath, "foo.txt") + dirBarPath = resolve(dirPath, "bar.txt") + // matches file exclusion pattern -> should be excluded - const excludedByFilename = resolve(tmpPath, "foo.txt") - await createFile(excludedByFilename) + // const excludedByFilename = resolve(tmpPath, "foo.txt") + await createFile(fooPath) // doesn't match file exclusion pattern -> should be included - const notExcludedByFilename = resolve(tmpPath, "bar.txt") - await createFile(notExcludedByFilename) + // const notExcludedByFilename = resolve(tmpPath, "bar.txt") + await createFile(barPath) - const notExcludedDirName = "dir" - const notExcludedDirPath = resolve(tmpPath, notExcludedDirName) - await mkdir(notExcludedDirPath) + // const notExcludedDirName = "dir" + // const notExcludedDirPath = resolve(tmpPath, notExcludedDirName) + await mkdir(dirPath) // matches exclusion pattern filename and located in non-excluded dir -> should be excluded - const excludedByFilenameNested = resolve(notExcludedDirPath, "foo.txt") - await createFile(excludedByFilenameNested) + // const excludedByFilenameNested = resolve(notExcludedDirPath, "foo.txt") + await createFile(dirFooPath) // doesn't match file exclusion pattern -> should be included - const notExcludedByFilenameNested = resolve(notExcludedDirPath, "bar.txt") - await createFile(notExcludedByFilenameNested) + // const notExcludedByFilenameNested = resolve(notExcludedDirPath, "bar.txt") + await createFile(dirBarPath) const files = ( await handler.getFiles({ path: tmpPath, scanRoot: undefined, include: undefined, - exclude: [testParam.exclusionBuilder()], + exclude: [testParam.exclude()], log, }) ) .map((p) => p.path) .sort() - const expectedFiles = [notExcludedByFilename, notExcludedByFilenameNested].sort() - expect(files).to.eql(expectedFiles) + expect(files).to.eql(testParam.expectedFiles()) }) } }) context("should filter directories that match the exclude filter", () => { - context("should filter out all files from direct sub-directories that match the exclude filter", () => { + context("should filter out any files from direct sub-directories that match the exclude filter", () => { + let barPath: string + let dirBarPath: string + let excludedDirFooPath: string + let excludedDirBarPath: string const testParams = [ { name: "without globs", exclusionBuilder: (subDirName: string) => subDirName, + expectedFiles: () => [barPath, dirBarPath], }, { name: "with prefix globs", exclusionBuilder: (subDirName: string) => join("**", subDirName), + expectedFiles: () => + gitScanMode === "repo" + ? [barPath, dirBarPath, excludedDirBarPath, excludedDirFooPath] + : [barPath, dirBarPath], }, { name: "with full globs", exclusionBuilder: (subDirName: string) => join("**", subDirName, "**", "*"), + expectedFiles: () => [barPath, dirBarPath], }, { name: "with redundant relative path", exclusionBuilder: (subDirName: string) => `./${subDirName}`, + expectedFiles: () => [barPath, dirBarPath], }, ] @@ -523,26 +550,25 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { for (const testParam of testParams) { it(testParam.name, async () => { // doesn't match file exclusion pattern -> should be included - const notExcludedByFilename = resolve(tmpPath, "bar.txt") + barPath = resolve(tmpPath, "bar.txt") - const notExcludedDirName = "dir" - const notExcludedDirPath = resolve(tmpPath, notExcludedDirName) - await mkdir(notExcludedDirPath) + const dirPath = resolve(tmpPath, "dir") + await mkdir(dirPath) // doesn't match file exclusion pattern in non-excluded dir -> should be included - const notExcludedInNotExcludedDir = resolve(notExcludedDirPath, "bar.txt") + dirBarPath = resolve(dirPath, "bar.txt") // both match directory exclusion pattern -> should be excluded despite the file exclusion pattern matching const excludedDirName = "excluded-dir" const excludedDirPath = resolve(tmpPath, excludedDirName) await mkdir(excludedDirPath) - const excludedByDirectory1 = resolve(excludedDirPath, "foo.txt") - const excludedByDirectory2 = resolve(excludedDirPath, "bar.txt") + excludedDirFooPath = resolve(excludedDirPath, "foo.txt") + excludedDirBarPath = resolve(excludedDirPath, "bar.txt") - await createFile(notExcludedByFilename) - await createFile(notExcludedInNotExcludedDir) - await createFile(excludedByDirectory1) - await createFile(excludedByDirectory2) + await createFile(barPath) + await createFile(dirBarPath) + await createFile(excludedDirFooPath) + await createFile(excludedDirBarPath) const files = ( await handler.getFiles({ @@ -556,29 +582,43 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { .map((p) => p.path) .sort() - const expectedFiles = [notExcludedByFilename, notExcludedInNotExcludedDir].sort() - expect(files).to.eql(expectedFiles) + expect(files).to.eql(testParam.expectedFiles()) }) } }) context("should filter out all files from deep sub-directories that match the exclude filter", () => { + let barPath: string + let dirBarPath: string + let excludedSubdirFooPath: string + let excludedSubdirBarPath: string + const testParams = [ { name: "without globs", exclusionBuilder: (...subDirNames: string[]) => subDirNames.at(-1)!, + expectedFiles: () => + gitScanMode === "repo" + ? [barPath, dirBarPath, excludedSubdirBarPath, excludedSubdirFooPath] + : [barPath, dirBarPath], }, { name: "with prefix globs", exclusionBuilder: (...subDirNames: string[]) => join("**", subDirNames.at(-1)!), + expectedFiles: () => + gitScanMode === "repo" + ? [barPath, dirBarPath, excludedSubdirBarPath, excludedSubdirFooPath] + : [barPath, dirBarPath], }, { name: "with full globs", exclusionBuilder: (...subDirNames: string[]) => join("**", subDirNames.at(-1)!, "**", "*"), + expectedFiles: () => [barPath, dirBarPath], }, { name: "with redundant relative path", exclusionBuilder: (...subDirNames: string[]) => `./${subDirNames.join("/")}`, + expectedFiles: () => [barPath, dirBarPath], }, ] @@ -594,41 +634,38 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { for (const testParam of testParams) { it(testParam.name, async () => { // doesn't match file exclusion pattern -> should be included - const notExcludedByFilename = resolve(tmpPath, "bar.txt") + barPath = resolve(tmpPath, "bar.txt") - const notExcludedDirName = "dir" - const notExcludedDirPath = resolve(tmpPath, notExcludedDirName) - await mkdir(notExcludedDirPath) + const dirPath = resolve(tmpPath, "dir") + await mkdir(dirPath) // doesn't match file exclusion pattern in non-excluded dir -> should be included - const notExcludedInNotExcludedDir = resolve(notExcludedDirPath, "bar.txt") + dirBarPath = resolve(dirPath, "bar.txt") // both match directory exclusion pattern -> should be excluded despite the file exclusion pattern matching - const excludedSubDirectoryName = "excluded-subdir" - const excludedSubDirectoryPath = resolve(notExcludedDirPath, excludedSubDirectoryName) + const excludedSubDirectoryPath = resolve(dirPath, "excluded-subdir") await mkdir(excludedSubDirectoryPath) - const excludedBySubDirectory1 = resolve(excludedSubDirectoryPath, "foo.txt") - const excludedBySubDirectory2 = resolve(excludedSubDirectoryPath, "bar.txt") + excludedSubdirFooPath = resolve(excludedSubDirectoryPath, "foo.txt") + excludedSubdirBarPath = resolve(excludedSubDirectoryPath, "bar.txt") - await createFile(notExcludedByFilename) - await createFile(notExcludedInNotExcludedDir) - await createFile(excludedBySubDirectory1) - await createFile(excludedBySubDirectory2) + await createFile(barPath) + await createFile(dirBarPath) + await createFile(excludedSubdirFooPath) + await createFile(excludedSubdirBarPath) const files = ( await handler.getFiles({ path: tmpPath, scanRoot: undefined, include: undefined, // when include: [], getFiles() always returns an empty result - exclude: [testParam.exclusionBuilder(notExcludedDirName, excludedSubDirectoryName)], + exclude: [testParam.exclusionBuilder("dir", "excluded-subdir")], log, }) ) .map((p) => p.path) .sort() - const expectedFiles = [notExcludedByFilename, notExcludedInNotExcludedDir].sort() - expect(files).to.eql(expectedFiles) + expect(files).to.eql(testParam.expectedFiles()) }) } }) @@ -828,6 +865,63 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { }) }) + if (gitScanMode === "repo") { + context("action at project root that ignores a subdir containing another action", () => { + let fooPath: string + let ignoredFilePath: string + let barPath: string + let dirFooPath: string + let dirSubdirFooPath: string + let dirBarPath: string + let dirIgnoredFilePath: string + it("should respect includes/excludes for both the root-level and nested actions", async () => { + // We don't actually need action configs to test this, but we do need to use the same VCS handler instance + // for both `getFiles` calls to test the caching behavior of the `repo` scan mode. + await mkdir(resolve(tmpPath, "dir")) + fooPath = resolve(tmpPath, "foo.txt") + ignoredFilePath = resolve(tmpPath, "ignored.txt") + barPath = resolve(tmpPath, "bar.txt") + dirFooPath = resolve(tmpPath, "dir", "foo.txt") + dirSubdirFooPath = resolve(tmpPath, "dir", "subdir", "foo.txt") + dirBarPath = resolve(tmpPath, "dir", "bar.txt") + dirIgnoredFilePath = resolve(tmpPath, "dir", "ignored.txt") + + await createFile(fooPath) + await createFile(ignoredFilePath) + await createFile(barPath) + await createFile(dirFooPath) + await createFile(dirSubdirFooPath) + await createFile(dirBarPath) + await createFile(dirIgnoredFilePath) + + const rootFiles = ( + await handler.getFiles({ + path: tmpPath, + scanRoot: undefined, + exclude: ["dir", "ignored.txt"], + log, + }) + ) + .map((f) => f.path) + .sort() + + const dirFiles = ( + await handler.getFiles({ + path: resolve(tmpPath, "dir"), + scanRoot: undefined, + exclude: ["ignored.txt"], + log, + }) + ) + .map((f) => f.path) + .sort() + + expect(rootFiles).to.eql([barPath, fooPath]) + expect(dirFiles).to.eql([dirBarPath, dirFooPath, dirSubdirFooPath]) + }) + }) + } + context("large repo", () => { // TODO: should we track and anyhow validate the execution time here? it("does its thing in a reasonable amount of time", async () => { @@ -1372,7 +1466,7 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { } // FIXME-GITREPOHANDLER: revisit these tests and disk-based configs, -// inspect the scenarios when both include and exclude filters are defined.s +// inspect the scenarios when both include and exclude filters are defined. const getTreeVersionTests = (gitScanMode: GitScanMode) => { const gitHandlerCls = getGitHandlerCls(gitScanMode) describe("getTreeVersion", () => { diff --git a/core/test/unit/src/vcs/vcs.ts b/core/test/unit/src/vcs/vcs.ts index be080c4b44..0f19e1fd22 100644 --- a/core/test/unit/src/vcs/vcs.ts +++ b/core/test/unit/src/vcs/vcs.ts @@ -199,7 +199,7 @@ describe("VcsHandler", () => { expect(result.files).to.eql(["foo"]) }) - it("should not call getFiles is include: [] is set on the module", async () => { + it("should not call getFiles if include: [] is set on the module", async () => { td.replace(handlerA, "getFiles", async () => { throw new Error("Nope!") }) diff --git a/docs/using-garden/actions.md b/docs/using-garden/actions.md index f2d85532e2..7b6e277edb 100644 --- a/docs/using-garden/actions.md +++ b/docs/using-garden/actions.md @@ -190,6 +190,31 @@ is to help users steer away from subtle bugs that can occur when actions uninten actions. See the next section for details on including and excluding files. {% endhint %} +### Differences in include/exclude behavior between the `repo` and `subtree` Git scan modes + +This section is only relevant for users who have set `scan.git.mode` to `subtree` in their project config. + +Garden supports two modes for scanning Git repositories (and any submodules) for files: +* `repo` (the default): Scans entire repositories and then filters down to files matching the paths, includes and +excludes for each action/module. This can be considerably more efficient than the `subtree` mode for large projects +with many actions/modules. +* `subtree` (legacy): This was Garden's scan algoithm before the `repo` scan mode was introduced. This method runs +individual `git` scans on each action/module path. + +When no includes are configured in the module/action config but there are one or more excludes in the config, +the `subtree` mode will interpret all exclude paths as globs. For example, excluding `foo.txt` would also exclude +`dir/foo.txt` (and any path that contains the string `foo.txt`). + +This is because the `subtree` scan mode uses the `--exclude` flag of `git ls-files` under the hood for perfomance +reasons. Note that this only happens when no includes are specified, and only when using the `subtree` scan mode +(which needs to be explicitly set in the project config). + +We're leaving this inconsistency with the older `subtree` mode in place to avoid breaking changes to the Garden configs +of users with older projects whose include/exclude configs rely on the exclude semantics of the `subtree` mode as is. + +The newer `repo` mode doesn't have this inconsistency, since it uses a different method to compute the file list for +an action/module. + ## Actions in the Stack Graph The key concept and the main benefit of having _actions_ instead of modules is that **anything can depend on anything**.