Skip to content

Commit

Permalink
improvement(core): faster file scanning on modules with includes set
Browse files Browse the repository at this point in the history
We now push the filtering to the underlying git command instead of
post-filtering, which may have a sizable impact in certain scenarios.
  • Loading branch information
edvald authored and thsig committed Mar 2, 2021
1 parent 8bdb702 commit b841837
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 39 deletions.
19 changes: 17 additions & 2 deletions core/src/util/fs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,29 @@ export async function findConfigPathsInPath({
exclude?: string[]
log: LogEntry
}): Promise<string[]> {
if (include) {
include = include.map((path) => {
const split = path.split(posix.sep)
const last = split[split.length - 1]

if (last === "**" || last === "*") {
// Swap out a general wildcard on the last path segment with one that will specifically match Garden configs,
// to optimize the scan.
return split.slice(0, -1).concat(["*garden.y*ml"]).join(posix.sep)
} else {
return path
}
})
} else {
include = ["**/*garden.y*ml"]
}

const paths = await vcs.getFiles({
path: dir,
pathDescription: "project root",
include,
exclude: exclude || [],
log,
// We specify both a pattern that is passed to `git`, and then double-check with a filter function
pattern: "*garden.y*ml",
filter: (f) => isConfigFilename(basename(f)),
})

Expand Down
65 changes: 37 additions & 28 deletions core/src/vcs/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,12 @@ export class GitHandler extends VcsHandler {
* Returns a list of files, along with file hashes, under the given path, taking into account the configured
* .ignore files, and the specified include/exclude filters.
*/
async getFiles({
log,
path,
pathDescription,
include,
exclude,
filter,
pattern,
}: GetFilesParams): Promise<VcsFile[]> {
async getFiles({ log, path, pathDescription, include, exclude, filter }: GetFilesParams): Promise<VcsFile[]> {
if (include && include.length === 0) {
// No need to proceed, nothing should be included
return []
}

const git = this.gitCli(log, path)
const gitRoot = await this.getRepoRoot(log, path)

Expand All @@ -126,13 +123,22 @@ export class GitHandler extends VcsHandler {
.map((modifiedRelPath) => resolve(gitRoot, modifiedRelPath))
)

// Apply the include patterns to the ls-files queries. We use the 'glob' "magic word" (in git parlance)
// to make sure the path handling is consistent with normal POSIX-style globs used generally by Garden.
// Note: We unfortunately can't exclude at this level because it simply doesn't work in git, for reasons unknown.
const patterns = [...(include || []).map((p) => ":(glob)" + p)]
const lsFilesCommonArgs = ["--exclude", this.gardenDirPath]

// List tracked but ignored files (we currently exclude those as well, so we need to query that specially)
// TODO: change in 0.13 to no longer exclude these
const trackedButIgnored = new Set(
this.ignoreFiles.length === 0
? []
: flatten(
await Promise.all(
this.ignoreFiles.map((f) => git("ls-files", "--ignored", "--exclude-per-directory", f, pattern))
this.ignoreFiles.map((f) =>
git("ls-files", "--ignored", ...lsFilesCommonArgs, "--exclude-per-directory", f)
)
)
)
)
Expand Down Expand Up @@ -185,19 +191,19 @@ export class GitHandler extends VcsHandler {

const resolvedPath = resolve(path, filePath)

// We push to the output array if it passes through the include/exclude filters.
if (matchPath(filePath, include, exclude) || submodulePaths.includes(resolvedPath)) {
// We push to the output array if it passes through the exclude filters.
if (matchPath(filePath, undefined, exclude) && !submodulePaths.includes(resolvedPath)) {
files.push({ path: resolvedPath, hash })
}
}

const lsFiles = (ignoreFile?: string) => {
const args = ["ls-files", "-s", "--others", "--exclude", this.gardenDirPath]
const args = ["ls-files", "-s", "--others", ...lsFilesCommonArgs]

if (ignoreFile) {
args.push("--exclude-per-directory", ignoreFile)
}
args.push(pattern ? join(path, pattern) : path)
args.push(...patterns)

return execa("git", args, { cwd: path, buffer: false })
}
Expand Down Expand Up @@ -272,22 +278,25 @@ export class GitHandler extends VcsHandler {
}

// Resolve submodules
const withSubmodules = flatten(
await Bluebird.map(files, async (f) => {
if (submodulePaths.includes(f.path)) {
// This path is a submodule, so we recursively call getFiles for that path again.
// Note: We apply include/exclude filters after listing files from submodule
return (
await this.getFiles({ log, path: f.path, pathDescription: "submodule", exclude: [] })
).filter((submoduleFile) => matchPath(relative(path, submoduleFile.path), include, exclude))
} else {
return [f]
}
})
)
// TODO: see about optimizing this further, avoiding scans when we're sure they'll not match includes/excludes etc.
await Bluebird.map(submodulePaths, async (submodulePath) => {
if (submodulePath.startsWith(path)) {
// Note: We apply include/exclude filters after listing files from submodule
const submoduleRelPath = relative(path, submodulePath)
files.push(
...(await this.getFiles({
log,
path: submodulePath,
pathDescription: "submodule",
exclude: [],
filter: (p) => matchPath(join(submoduleRelPath, p), include, exclude),
}))
)
}
})

// Make sure we have a fresh hash for each file
return Bluebird.map(withSubmodules, async (f) => {
return Bluebird.map(files, async (f) => {
const resolvedPath = resolve(path, f.path)
let output = { path: resolvedPath, hash: f.hash || "" }
let stats: Stats
Expand Down
1 change: 0 additions & 1 deletion core/src/vcs/vcs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ export interface GetFilesParams {
include?: string[]
exclude?: string[]
filter?: (path: string) => boolean
pattern?: string
}

export interface RemoteSourceParams {
Expand Down
23 changes: 15 additions & 8 deletions core/test/unit/src/vcs/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,20 @@ describe("GitHandler", () => {
expect(await handler.getFiles({ path: tmpPath, log })).to.eql([{ path: resolve(tmpPath, "my file.txt"), hash }])
})

it("should filter out files that don't match the include filter, if specified", async () => {
it("should return nothing if include: []", async () => {
const path = resolve(tmpPath, "foo.txt")
await createFile(path)

expect(await handler.getFiles({ path: tmpPath, include: [], log })).to.eql([])
})

it("should filter out files that don't match the include filter, if specified", async () => {
const path = resolve(tmpPath, "foo.txt")
await createFile(path)

expect(await handler.getFiles({ path: tmpPath, include: ["bar.*"], log })).to.eql([])
})

it("should include files that match the include filter, if specified", async () => {
const path = resolve(tmpPath, "foo.txt")
const hash = "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"
Expand All @@ -271,11 +278,11 @@ describe("GitHandler", () => {
})

it("should respect include and exclude patterns, if both are specified", async () => {
const dir = resolve(tmpPath, "module-a")
const pathA = resolve(dir, "yes.txt")
const moduleDir = resolve(tmpPath, "module-a")
const pathA = resolve(moduleDir, "yes.txt")
const pathB = resolve(tmpPath, "no.txt")
const pathC = resolve(dir, "yes.pass")
await mkdir(dir)
const pathC = resolve(moduleDir, "yes.pass")
await mkdir(moduleDir)
await createFile(pathA)
await createFile(pathB)
await createFile(pathC)
Expand Down Expand Up @@ -425,7 +432,7 @@ describe("GitHandler", () => {
submodulePath = await realpath(submodule.path)
initFile = await commit("init", submodulePath)

await execa("git", ["submodule", "add", submodulePath, "sub"], { cwd: tmpPath })
await execa("git", ["submodule", "add", submodulePath, "sub", "--force"], { cwd: tmpPath })
await execa("git", ["commit", "-m", "add submodule"], { cwd: tmpPath })
})

Expand Down Expand Up @@ -454,10 +461,10 @@ describe("GitHandler", () => {
const path = join(tmpPath, "sub", "x.foo")
await createFile(path)

const files = await handler.getFiles({ path: tmpPath, log, include: ["sub/*.txt"] })
const files = await handler.getFiles({ path: tmpPath, log, include: ["**/*.txt"] })
const paths = files.map((f) => relative(tmpPath, f.path)).sort()

expect(paths).to.eql([join("sub", initFile)])
expect(paths).to.include(join("sub", initFile))
})

it("should respect exclude filter when scanning a submodule", async () => {
Expand Down

0 comments on commit b841837

Please sign in to comment.