From 892719ae5db393a03636f5e9e36351982d5c6950 Mon Sep 17 00:00:00 2001 From: Thorarinn Sigurdsson Date: Tue, 6 Feb 2024 10:27:54 +0100 Subject: [PATCH] fix(git): fix file list caching bug for repo mode This fixes an error that would come up e.g. when an action config in the project root excluded a subdirectory containing another action config. In that setup, the file list for the action in the subdirectory would be empty because the file list had been cached using the exclude config applied when computing the file list for the action config at the root level (which included no files for the excluded subdirectory). This was fixed by adding another level of caching to `GitRepoHandler` that accounts for different includes/excludes and filter functions for the same path. --- core/src/vcs/git-repo.ts | 53 ++++++---- core/src/vcs/git.ts | 6 +- core/test/unit/src/vcs/git.ts | 193 ++++++++++++++++++++++++---------- core/test/unit/src/vcs/vcs.ts | 2 +- docs/using-garden/actions.md | 25 +++++ 5 files changed, 200 insertions(+), 79 deletions(-) diff --git a/core/src/vcs/git-repo.ts b/core/src/vcs/git-repo.ts index 8690c44f1e..6eea2c30da 100644 --- a/core/src/vcs/git-repo.ts +++ b/core/src/vcs/git-repo.ts @@ -13,6 +13,8 @@ import fsExtra from "fs-extra" import { pathToCacheContext } from "../cache.js" import { FileTree } from "./file-tree.js" import { normalize, sep } from "path" +import { stableStringify } from "../util/string.js" +import { hashString } from "../util/util.js" const { pathExists } = fsExtra @@ -62,10 +64,7 @@ export class GitRepoHandler extends GitHandler { override async getFiles(params: GetFilesParams): Promise { const { log, path, pathDescription, filter, failOnPrompt = false } = params - if (params.include && params.include.length === 0) { - // No need to proceed, nothing should be included - return [] - } + let scanRoot = params.scanRoot || path if (!(await pathExists(path))) { log.warn(`${pathDescription} ${path} could not be found.`) @@ -77,42 +76,50 @@ export class GitRepoHandler extends GitHandler { return [] } - let scanRoot = params.scanRoot || path - if (!params.scanRoot && params.pathDescription !== "submodule") { scanRoot = await this.getRepoRoot(log, path, failOnPrompt) } + const scanFromProjectRoot = scanRoot === this.garden?.projectRoot + const { augmentedExcludes, augmentedIncludes } = await getIncludeExcludeFiles({ ...params, scanFromProjectRoot }) + + const hashedFilterParams = hashString( + stableStringify({ + filter: filter ? filter.toString() : undefined, // We hash the source code of the filter function if provided. + augmentedIncludes, + augmentedExcludes, + }) + ) + const filteredFilesCacheKey = ["git-repo-files", path, hashedFilterParams] + + const cached = this.cache.get(log, filteredFilesCacheKey) as VcsFile[] | undefined + if (cached) { + return cached + } + + if (params.include && params.include.length === 0) { + // No need to proceed, nothing should be included + return [] + } + const fileTree = await this.scanRepo({ log, path: scanRoot, pathDescription: pathDescription || "repository", failOnPrompt, - // This method delegates to the old "subtree" Git scan mode that use `--exclude` and `--glob-pathspecs`. - // We need to pass the exclude-filter to ensure that all excluded files and dirs are excluded properly, - // i.e. with the properly expanded globs. - // Otherwise, the "repo" Git scan mode may return some excluded files - // which should be skipped by design and are skipped by the "subtree" mode. - // Thus, some excluded files can appear in the resulting fileTree. - // It happens because the new "repo" mode does not use `--glob-pathspecs` flag - // and does some explicit glob patterns augmentation that misses some edge-cases. - exclude: params.exclude, }) - const moduleFiles = fileTree.getFilesAtPath(path) - - const scanFromProjectRoot = scanRoot === this.garden?.projectRoot - const { augmentedExcludes, augmentedIncludes } = await getIncludeExcludeFiles({ ...params, scanFromProjectRoot }) + const filesAtPath = fileTree.getFilesAtPath(path) log.debug( - `Found ${moduleFiles.length} files in module path, filtering by ${augmentedIncludes.length} include and ${augmentedExcludes.length} exclude globs` + `Found ${filesAtPath.length} files in path, filtering by ${augmentedIncludes.length} include and ${augmentedExcludes.length} exclude globs` ) log.silly(() => `Include globs: ${augmentedIncludes.join(", ")}`) log.silly(() => augmentedExcludes.length > 0 ? `Exclude globs: ${augmentedExcludes.join(", ")}` : "No exclude globs" ) - const filtered = moduleFiles.filter(({ path: p }) => { + const filtered = filesAtPath.filter(({ path: p }) => { if (filter && !filter(p)) { return false } @@ -127,6 +134,8 @@ export class GitRepoHandler extends GitHandler { log.debug(`Found ${filtered.length} files in module path after glob matching`) + this.cache.set(log, filteredFilesCacheKey, filtered, pathToCacheContext(path)) + return filtered } @@ -140,7 +149,7 @@ export class GitRepoHandler extends GitHandler { const { log, path } = params const key = ["git-repo-files", path] - let existing = this.cache.get(log, key) as FileTree + let existing = this.cache.get(log, key) as FileTree | undefined if (existing) { params.log.silly(() => `Found cached repository match at ${path}`) diff --git a/core/src/vcs/git.ts b/core/src/vcs/git.ts index 6f228fdc8e..099c7ec5fd 100644 --- a/core/src/vcs/git.ts +++ b/core/src/vcs/git.ts @@ -200,13 +200,12 @@ export class GitHandler extends VcsHandler { } const { log, path, pathDescription = "directory", filter, failOnPrompt = false } = params - const { exclude, hasIncludes, include } = await getIncludeExcludeFiles(params) const gitLog = log .createLog({ name: "git" }) .debug( - `Scanning ${pathDescription} at ${path}\n → Includes: ${include || "(none)"}\n → Excludes: ${ - exclude || "(none)" + `Scanning ${pathDescription} at ${path}\n → Includes: ${params.include || "(none)"}\n → Excludes: ${ + params.exclude || "(none)" }` ) @@ -225,6 +224,7 @@ export class GitHandler extends VcsHandler { throw err } } + const { exclude, hasIncludes, include } = await getIncludeExcludeFiles(params) let files: VcsFile[] = [] diff --git a/core/test/unit/src/vcs/git.ts b/core/test/unit/src/vcs/git.ts index f8d4c648e3..4a2e27686c 100644 --- a/core/test/unit/src/vcs/git.ts +++ b/core/test/unit/src/vcs/git.ts @@ -428,86 +428,106 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { // the exclusion paths with and without glob prefix **/ works in the same way. context("when only exclude filter is specified", () => { context("should filter out files that match the exclude filter", () => { - const excludedFilenameNoExt = "foo" - const excludedFilenameTxt = `${excludedFilenameNoExt}.txt` - const excludedFilenameWildcard = `${excludedFilenameNoExt}.*` + const fooTxt = `foo.txt` + const fooWildcard = `foo.*` + + let fooPath: string + let barPath: string + let dirPath: string + let dirFooPath: string + let dirBarPath: string + const testParams = [ { name: "by exact filename without globs", - exclusionBuilder: () => excludedFilenameTxt, + exclude: () => fooTxt, + expectedFiles: () => (gitScanMode === "repo" ? [barPath, dirBarPath, dirFooPath] : [barPath, dirBarPath]), }, { name: "by exact filename with prefix globs", - exclusionBuilder: () => join("**", excludedFilenameTxt), + exclude: () => join("**", fooTxt), + expectedFiles: () => [barPath, dirBarPath], }, { name: "by filename with wildcard extension without prefix globs", - exclusionBuilder: () => excludedFilenameWildcard, + exclude: () => fooWildcard, + expectedFiles: () => (gitScanMode === "repo" ? [barPath, dirBarPath, dirFooPath] : [barPath, dirBarPath]), }, { name: "by filename with wildcard extension with prefix globs", - exclusionBuilder: () => join("**", excludedFilenameWildcard), + exclude: () => join("**", fooWildcard), + expectedFiles: () => [barPath, dirBarPath], }, ] for (const testParam of testParams) { it(testParam.name, async () => { + fooPath = resolve(tmpPath, "foo.txt") + barPath = resolve(tmpPath, "bar.txt") + // const notExcludedDirName = "dir" + dirPath = resolve(tmpPath, "dir") + dirFooPath = resolve(dirPath, "foo.txt") + dirBarPath = resolve(dirPath, "bar.txt") + // matches file exclusion pattern -> should be excluded - const excludedByFilename = resolve(tmpPath, "foo.txt") - await createFile(excludedByFilename) + await createFile(fooPath) // doesn't match file exclusion pattern -> should be included - const notExcludedByFilename = resolve(tmpPath, "bar.txt") - await createFile(notExcludedByFilename) - - const notExcludedDirName = "dir" - const notExcludedDirPath = resolve(tmpPath, notExcludedDirName) - await mkdir(notExcludedDirPath) + await createFile(barPath) + await mkdir(dirPath) // matches exclusion pattern filename and located in non-excluded dir -> should be excluded - const excludedByFilenameNested = resolve(notExcludedDirPath, "foo.txt") - await createFile(excludedByFilenameNested) + await createFile(dirFooPath) // doesn't match file exclusion pattern -> should be included - const notExcludedByFilenameNested = resolve(notExcludedDirPath, "bar.txt") - await createFile(notExcludedByFilenameNested) + await createFile(dirBarPath) const files = ( await handler.getFiles({ path: tmpPath, scanRoot: undefined, include: undefined, - exclude: [testParam.exclusionBuilder()], + exclude: [testParam.exclude()], log, }) ) .map((p) => p.path) .sort() - const expectedFiles = [notExcludedByFilename, notExcludedByFilenameNested].sort() - expect(files).to.eql(expectedFiles) + expect(files).to.eql(testParam.expectedFiles()) }) } }) context("should filter directories that match the exclude filter", () => { - context("should filter out all files from direct sub-directories that match the exclude filter", () => { + context("should filter out any files from direct sub-directories that match the exclude filter", () => { + let barPath: string + let dirBarPath: string + let excludedDirFooPath: string + let excludedDirBarPath: string const testParams = [ { name: "without globs", exclusionBuilder: (subDirName: string) => subDirName, + expectedFiles: () => [barPath, dirBarPath], }, { name: "with prefix globs", exclusionBuilder: (subDirName: string) => join("**", subDirName), + expectedFiles: () => + gitScanMode === "repo" + ? [barPath, dirBarPath, excludedDirBarPath, excludedDirFooPath] + : [barPath, dirBarPath], }, { name: "with full globs", exclusionBuilder: (subDirName: string) => join("**", subDirName, "**", "*"), + expectedFiles: () => [barPath, dirBarPath], }, { name: "with redundant relative path", exclusionBuilder: (subDirName: string) => `./${subDirName}`, + expectedFiles: () => [barPath, dirBarPath], }, ] @@ -523,26 +543,25 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { for (const testParam of testParams) { it(testParam.name, async () => { // doesn't match file exclusion pattern -> should be included - const notExcludedByFilename = resolve(tmpPath, "bar.txt") + barPath = resolve(tmpPath, "bar.txt") - const notExcludedDirName = "dir" - const notExcludedDirPath = resolve(tmpPath, notExcludedDirName) - await mkdir(notExcludedDirPath) + const dirPath = resolve(tmpPath, "dir") + await mkdir(dirPath) // doesn't match file exclusion pattern in non-excluded dir -> should be included - const notExcludedInNotExcludedDir = resolve(notExcludedDirPath, "bar.txt") + dirBarPath = resolve(dirPath, "bar.txt") // both match directory exclusion pattern -> should be excluded despite the file exclusion pattern matching const excludedDirName = "excluded-dir" const excludedDirPath = resolve(tmpPath, excludedDirName) await mkdir(excludedDirPath) - const excludedByDirectory1 = resolve(excludedDirPath, "foo.txt") - const excludedByDirectory2 = resolve(excludedDirPath, "bar.txt") + excludedDirFooPath = resolve(excludedDirPath, "foo.txt") + excludedDirBarPath = resolve(excludedDirPath, "bar.txt") - await createFile(notExcludedByFilename) - await createFile(notExcludedInNotExcludedDir) - await createFile(excludedByDirectory1) - await createFile(excludedByDirectory2) + await createFile(barPath) + await createFile(dirBarPath) + await createFile(excludedDirFooPath) + await createFile(excludedDirBarPath) const files = ( await handler.getFiles({ @@ -556,29 +575,43 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { .map((p) => p.path) .sort() - const expectedFiles = [notExcludedByFilename, notExcludedInNotExcludedDir].sort() - expect(files).to.eql(expectedFiles) + expect(files).to.eql(testParam.expectedFiles()) }) } }) context("should filter out all files from deep sub-directories that match the exclude filter", () => { + let barPath: string + let dirBarPath: string + let excludedSubdirFooPath: string + let excludedSubdirBarPath: string + const testParams = [ { name: "without globs", exclusionBuilder: (...subDirNames: string[]) => subDirNames.at(-1)!, + expectedFiles: () => + gitScanMode === "repo" + ? [barPath, dirBarPath, excludedSubdirBarPath, excludedSubdirFooPath] + : [barPath, dirBarPath], }, { name: "with prefix globs", exclusionBuilder: (...subDirNames: string[]) => join("**", subDirNames.at(-1)!), + expectedFiles: () => + gitScanMode === "repo" + ? [barPath, dirBarPath, excludedSubdirBarPath, excludedSubdirFooPath] + : [barPath, dirBarPath], }, { name: "with full globs", exclusionBuilder: (...subDirNames: string[]) => join("**", subDirNames.at(-1)!, "**", "*"), + expectedFiles: () => [barPath, dirBarPath], }, { name: "with redundant relative path", exclusionBuilder: (...subDirNames: string[]) => `./${subDirNames.join("/")}`, + expectedFiles: () => [barPath, dirBarPath], }, ] @@ -594,41 +627,38 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { for (const testParam of testParams) { it(testParam.name, async () => { // doesn't match file exclusion pattern -> should be included - const notExcludedByFilename = resolve(tmpPath, "bar.txt") + barPath = resolve(tmpPath, "bar.txt") - const notExcludedDirName = "dir" - const notExcludedDirPath = resolve(tmpPath, notExcludedDirName) - await mkdir(notExcludedDirPath) + const dirPath = resolve(tmpPath, "dir") + await mkdir(dirPath) // doesn't match file exclusion pattern in non-excluded dir -> should be included - const notExcludedInNotExcludedDir = resolve(notExcludedDirPath, "bar.txt") + dirBarPath = resolve(dirPath, "bar.txt") // both match directory exclusion pattern -> should be excluded despite the file exclusion pattern matching - const excludedSubDirectoryName = "excluded-subdir" - const excludedSubDirectoryPath = resolve(notExcludedDirPath, excludedSubDirectoryName) + const excludedSubDirectoryPath = resolve(dirPath, "excluded-subdir") await mkdir(excludedSubDirectoryPath) - const excludedBySubDirectory1 = resolve(excludedSubDirectoryPath, "foo.txt") - const excludedBySubDirectory2 = resolve(excludedSubDirectoryPath, "bar.txt") + excludedSubdirFooPath = resolve(excludedSubDirectoryPath, "foo.txt") + excludedSubdirBarPath = resolve(excludedSubDirectoryPath, "bar.txt") - await createFile(notExcludedByFilename) - await createFile(notExcludedInNotExcludedDir) - await createFile(excludedBySubDirectory1) - await createFile(excludedBySubDirectory2) + await createFile(barPath) + await createFile(dirBarPath) + await createFile(excludedSubdirFooPath) + await createFile(excludedSubdirBarPath) const files = ( await handler.getFiles({ path: tmpPath, scanRoot: undefined, include: undefined, // when include: [], getFiles() always returns an empty result - exclude: [testParam.exclusionBuilder(notExcludedDirName, excludedSubDirectoryName)], + exclude: [testParam.exclusionBuilder("dir", "excluded-subdir")], log, }) ) .map((p) => p.path) .sort() - const expectedFiles = [notExcludedByFilename, notExcludedInNotExcludedDir].sort() - expect(files).to.eql(expectedFiles) + expect(files).to.eql(testParam.expectedFiles()) }) } }) @@ -828,6 +858,63 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { }) }) + if (gitScanMode === "repo") { + context("action at project root that ignores a subdir containing another action", () => { + let fooPath: string + let ignoredFilePath: string + let barPath: string + let dirFooPath: string + let dirSubdirFooPath: string + let dirBarPath: string + let dirIgnoredFilePath: string + it("should respect includes/excludes for both the root-level and nested actions", async () => { + // We don't actually need action configs to test this, but we do need to use the same VCS handler instance + // for both `getFiles` calls to test the caching behavior of the `repo` scan mode. + await mkdir(resolve(tmpPath, "dir")) + fooPath = resolve(tmpPath, "foo.txt") + ignoredFilePath = resolve(tmpPath, "ignored.txt") + barPath = resolve(tmpPath, "bar.txt") + dirFooPath = resolve(tmpPath, "dir", "foo.txt") + dirSubdirFooPath = resolve(tmpPath, "dir", "subdir", "foo.txt") + dirBarPath = resolve(tmpPath, "dir", "bar.txt") + dirIgnoredFilePath = resolve(tmpPath, "dir", "ignored.txt") + + await createFile(fooPath) + await createFile(ignoredFilePath) + await createFile(barPath) + await createFile(dirFooPath) + await createFile(dirSubdirFooPath) + await createFile(dirBarPath) + await createFile(dirIgnoredFilePath) + + const rootFiles = ( + await handler.getFiles({ + path: tmpPath, + scanRoot: undefined, + exclude: ["dir", "ignored.txt"], + log, + }) + ) + .map((f) => f.path) + .sort() + + const dirFiles = ( + await handler.getFiles({ + path: resolve(tmpPath, "dir"), + scanRoot: undefined, + exclude: ["ignored.txt"], + log, + }) + ) + .map((f) => f.path) + .sort() + + expect(rootFiles).to.eql([barPath, fooPath]) + expect(dirFiles).to.eql([dirBarPath, dirFooPath, dirSubdirFooPath]) + }) + }) + } + context("large repo", () => { // TODO: should we track and anyhow validate the execution time here? it("does its thing in a reasonable amount of time", async () => { @@ -1372,7 +1459,7 @@ const commonGitHandlerTests = (gitScanMode: GitScanMode) => { } // FIXME-GITREPOHANDLER: revisit these tests and disk-based configs, -// inspect the scenarios when both include and exclude filters are defined.s +// inspect the scenarios when both include and exclude filters are defined. const getTreeVersionTests = (gitScanMode: GitScanMode) => { const gitHandlerCls = getGitHandlerCls(gitScanMode) describe("getTreeVersion", () => { diff --git a/core/test/unit/src/vcs/vcs.ts b/core/test/unit/src/vcs/vcs.ts index be080c4b44..0f19e1fd22 100644 --- a/core/test/unit/src/vcs/vcs.ts +++ b/core/test/unit/src/vcs/vcs.ts @@ -199,7 +199,7 @@ describe("VcsHandler", () => { expect(result.files).to.eql(["foo"]) }) - it("should not call getFiles is include: [] is set on the module", async () => { + it("should not call getFiles if include: [] is set on the module", async () => { td.replace(handlerA, "getFiles", async () => { throw new Error("Nope!") }) diff --git a/docs/using-garden/actions.md b/docs/using-garden/actions.md index f2d85532e2..b46f01a3aa 100644 --- a/docs/using-garden/actions.md +++ b/docs/using-garden/actions.md @@ -190,6 +190,31 @@ is to help users steer away from subtle bugs that can occur when actions uninten actions. See the next section for details on including and excluding files. {% endhint %} +### Differences in exclude behavior between the `repo` and `subtree` Git scan modes when no include is configured + +This section is only relevant for users who have set `scan.git.mode` to `subtree` in their project config. + +Garden supports two modes for scanning Git repositories (and any submodules) for files: +* `repo` (the default): Scans entire repositories and then filters down to files matching the paths, includes and +excludes for each action/module. This can be considerably more efficient than the `subtree` mode for large projects +with many actions/modules. +* `subtree` (legacy): This was Garden's scan algoithm before the `repo` scan mode was introduced. This method runs +individual `git` scans on each action/module path. + +When no includes are configured in the module/action config but there are one or more excludes in the config, +the `subtree` mode will interpret all exclude paths as globs. For example, excluding `foo.txt` would also exclude +`dir/foo.txt` (and any path that contains the string `foo.txt`). + +This is because the `subtree` scan mode uses the `--exclude` flag of `git ls-files` under the hood for perfomance +reasons. Note that this only happens when no includes are specified, and only when using the `subtree` scan mode +(which needs to be explicitly set in the project config). + +We're leaving this inconsistency with the older `subtree` mode in place to avoid breaking changes to the Garden configs +of users with older projects whose include/exclude configs rely on the exclude semantics of the `subtree` mode as is. + +The newer `repo` mode doesn't have this inconsistency, since it uses a different method to compute the file list for +an action/module. + ## Actions in the Stack Graph The key concept and the main benefit of having _actions_ instead of modules is that **anything can depend on anything**.