Skip to content

Commit

Permalink
feat(vcs): add new git repo scanning method to improve resolution speed
Browse files Browse the repository at this point in the history
This adds a new experimental git repo scanning method, enabled by setting
`scan.git.mode: repo` in the project config or by setting
`GARDEN_GIT_SCAN_MODE=repo`.

It can still do with quite a bit of optimization but the general idea is to
do far fewer (albeit generally more expensive) calls to git.

For now I don't think it makes sense to make this the default since there are
pros and cons to either approach, but for large repositories with many
actions/modules this is likely to be more efficient.

I suggest we collect user feedback and do IRL tests before diving in and
optimizing further. One obvious optimization will be to index the repository
files more efficiently, instead of just filtering on a straight list of paths.
It would also make sense to delay hashing files that aren't in the index until
they are called upon.
  • Loading branch information
edvald committed Jul 6, 2023
1 parent f3bc22c commit 6cb96a6
Show file tree
Hide file tree
Showing 32 changed files with 832 additions and 429 deletions.
4 changes: 4 additions & 0 deletions .gardenignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
.garden/
.history/
.vscode/
.gradle/
node_modules/
tmp/
core/tmp/
6 changes: 6 additions & 0 deletions .testignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.garden/
.history/
.vscode/
.gradle/
tmp/
core/tmp/
3 changes: 2 additions & 1 deletion core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@
"integ-local": "GARDEN_INTEG_TEST_MODE=local GARDEN_SKIP_TESTS=\"remote-only\" yarn _integ",
"integ-minikube": "GARDEN_INTEG_TEST_MODE=local GARDEN_SKIP_TESTS=\"remote-only\" yarn _integ",
"integ-remote": "GARDEN_INTEG_TEST_MODE=remote GARDEN_SKIP_TESTS=local-only yarn _integ",
"test": "mocha"
"test": "mocha",
"test:silly": "GARDEN_LOGGER_TYPE=basic GARDEN_LOG_LEVEL=silly mocha"
},
"pkg": {
"scripts": [
Expand Down
4 changes: 2 additions & 2 deletions core/src/commands/sync/sync-start.ts
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,8 @@ export async function startSyncWithoutDeploy({
someSyncStarted = true

if (monitor) {
const monitor = new SyncMonitor({ garden, log, action: executedAction, graph, stopOnExit })
garden.monitors.addAndSubscribe(monitor, command)
const m = new SyncMonitor({ garden, log, action: executedAction, graph, stopOnExit })
garden.monitors.addAndSubscribe(m, command)
}
} catch (error) {
actionLog.warn(
Expand Down
19 changes: 17 additions & 2 deletions core/src/config/project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import { findByName, getNames } from "../util/util"
import { ConfigurationError, ParameterError, ValidationError } from "../exceptions"
import { cloneDeep, memoize } from "lodash"
import { GenericProviderConfig, providerConfigBaseSchema } from "./provider"
import { DOCS_BASE_URL, GardenApiVersion } from "../constants"
import { DOCS_BASE_URL, GardenApiVersion, GitScanMode, gitScanModes } from "../constants"
import { defaultDotIgnoreFile } from "../util/fs"
import type { CommandInfo } from "../plugin-context"
import type { VcsInfo } from "../vcs/vcs"
Expand Down Expand Up @@ -191,6 +191,10 @@ export interface ProxyConfig {
hostname: string
}

interface GitConfig {
mode: GitScanMode
}

export interface ProjectConfig {
apiVersion: GardenApiVersion
kind: "Project"
Expand All @@ -207,6 +211,7 @@ export interface ProjectConfig {
scan?: {
include?: string[]
exclude?: string[]
git?: GitConfig
}
outputs?: OutputSpec[]
providers: GenericProviderConfig[]
Expand Down Expand Up @@ -261,6 +266,16 @@ const projectScanSchema = createSchema({
`
)
.example(["public/**/*", "tmp/**/*"]),
git: joi.object().keys({
mode: joi
.string()
.allow(...gitScanModes)
.only()
.default("subtree")
.description(
"Choose how to perform scans of git repositories. The default (`subtree`) runs individual git scans on each action/module path. The `repo` mode scans entire repositories and then filters down to files matching the paths, includes and excludes for each action/module. This can be considerably more efficient for large projects with many actions/modules."
),
}),
}),
})

Expand Down Expand Up @@ -376,7 +391,7 @@ export const projectSchema = createSchema({
)
.example(["127.0.0.1"]),
}),
scan: projectScanSchema().description("Control where to scan for configuration files in the project."),
scan: projectScanSchema().description("Control where and how to scan for configuration files in the project."),
outputs: joiSparseArray(projectOutputSchema())
.unique("name")
.description(
Expand Down
5 changes: 5 additions & 0 deletions core/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ import { homedir } from "os"

export const isPkg = !!(<any>process).pkg

export const gitScanModes = ["repo", "subtree"] as const
export type GitScanMode = (typeof gitScanModes)[number]
export const defaultGitScanMode: GitScanMode = "subtree"

export const GARDEN_CORE_ROOT = isPkg ? resolve(process.execPath, "..") : resolve(__dirname, "..", "..")
export const GARDEN_CLI_ROOT = isPkg ? resolve(process.execPath, "..") : resolve(GARDEN_CORE_ROOT, "..", "cli")
export const STATIC_DIR = isPkg ? resolve(process.execPath, "..", "static") : resolve(GARDEN_CORE_ROOT, "..", "static")
Expand Down Expand Up @@ -62,6 +66,7 @@ export const gardenEnv = {
GARDEN_ENVIRONMENT: env.get("GARDEN_ENVIRONMENT").required(false).asString(),
GARDEN_EXPERIMENTAL_BUILD_STAGE: env.get("GARDEN_EXPERIMENTAL_BUILD_STAGE").required(false).asBool(),
GARDEN_GE_SCHEDULED: env.get("GARDEN_GE_SCHEDULED").required(false).asBool(),
GARDEN_GIT_SCAN_MODE: env.get("GARDEN_GIT_SCAN_MODE").required(false).asEnum(gitScanModes),
GARDEN_LEGACY_BUILD_STAGE: env.get("GARDEN_LEGACY_BUILD_STAGE").required(false).asBool(),
GARDEN_LOG_LEVEL: env.get("GARDEN_LOG_LEVEL").required(false).asString(),
GARDEN_LOGGER_TYPE: env.get("GARDEN_LOGGER_TYPE").required(false).asString(),
Expand Down
53 changes: 29 additions & 24 deletions core/src/garden.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ import { getGardenInstanceKey } from "./server/helpers"
import { SuggestedCommand } from "./commands/base"
import { OtelTraced } from "./util/tracing/decorators"
import { wrapActiveSpan } from "./util/tracing/spans"
import { GitRepoHandler } from "./vcs/git-repo"

const defaultLocalAddress = "localhost"

Expand Down Expand Up @@ -329,7 +330,10 @@ export class Garden {

this.asyncLock = new AsyncLock()

this.vcs = new GitHandler({
const gitMode = gardenEnv.GARDEN_GIT_SCAN_MODE || params.projectConfig.scan?.git?.mode
const handlerCls = gitMode === "repo" ? GitRepoHandler : GitHandler

this.vcs = new handlerCls({
garden: this,
projectRoot: params.projectRoot,
gardenDirPath: params.gardenDirPath,
Expand Down Expand Up @@ -1051,7 +1055,7 @@ export class Garden {

let updated = false

// Resolve modules from specs and add to the list
// Resolve actions from augmentGraph specs and add to the list
await Bluebird.map(addActions || [], async (config) => {
// There is no actual config file for plugin modules (which the prepare function assumes)
delete config.internal?.configFilePath
Expand All @@ -1071,6 +1075,7 @@ export class Garden {
configsByKey: actionConfigs,
mode: actionModes[key] || "default",
linkedSources,
scanRoot: config.internal.basePath,
})

graph.addAction(action)
Expand Down Expand Up @@ -1139,45 +1144,40 @@ export class Garden {
@OtelTraced({
name: "resolveAction",
})
async resolveAction<T extends Action>({ action, graph, log }: { action: T; log: Log; graph?: ConfigGraph }) {
if (!graph) {
graph = await this.getConfigGraph({ log, emit: false })
}

async resolveAction<T extends Action>({ action, graph, log }: { action: T; log: Log; graph: ConfigGraph }) {
return resolveAction({ garden: this, action, graph, log })
}

@OtelTraced({
name: "resolveActions",
})
async resolveActions<T extends Action>({ actions, graph, log }: { actions: T[]; log: Log; graph?: ConfigGraph }) {
if (!graph) {
graph = await this.getConfigGraph({ log, emit: false })
}

async resolveActions<T extends Action>({ actions, graph, log }: { actions: T[]; log: Log; graph: ConfigGraph }) {
return resolveActions({ garden: this, actions, graph, log })
}

@OtelTraced({
name: "executeAction",
})
async executeAction<T extends Action>({ action, graph, log }: { action: T; log: Log; graph?: ConfigGraph }) {
if (!graph) {
graph = await this.getConfigGraph({ log, emit: false })
}

async executeAction<T extends Action>({ action, graph, log }: { action: T; log: Log; graph: ConfigGraph }) {
return executeAction({ garden: this, action, graph, log })
}

/**
* Resolves the module version (i.e. build version) for the given module configuration and its build dependencies.
*/
async resolveModuleVersion(
log: Log,
moduleConfig: ModuleConfig,
moduleDependencies: GardenModule[],
force = false
): Promise<ModuleVersion> {
async resolveModuleVersion({
log,
moduleConfig,
moduleDependencies,
force = false,
scanRoot,
}: {
log: Log
moduleConfig: ModuleConfig
moduleDependencies: GardenModule[]
force?: boolean
scanRoot?: string
}): Promise<ModuleVersion> {
const moduleName = moduleConfig.name
const depModuleNames = moduleDependencies.map((m) => m.name)
depModuleNames.sort()
Expand All @@ -1195,7 +1195,12 @@ export class Garden {

const cacheContexts = [...moduleDependencies, moduleConfig].map((c: ModuleConfig) => getModuleCacheContext(c))

const treeVersion = await this.vcs.getTreeVersion({ log, projectName: this.projectName, config: moduleConfig })
const treeVersion = await this.vcs.getTreeVersion({
log,
projectName: this.projectName,
config: moduleConfig,
scanRoot,
})

validateSchema(treeVersion, treeVersionSchema(), {
context: `${this.vcs.name} tree version for module at ${moduleConfig.path}`,
Expand Down
23 changes: 21 additions & 2 deletions core/src/graph/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ import { LinkedSource, LinkedSourceMap } from "../config-store/local"
import { relative } from "path"
import { profileAsync } from "../util/profiling"
import { uuidv4 } from "../util/random"
import { getConfigBasePath } from "../vcs/vcs"

export const actionConfigsToGraph = profileAsync(async function actionConfigsToGraph({
garden,
Expand Down Expand Up @@ -112,6 +113,10 @@ export const actionConfigsToGraph = profileAsync(async function actionConfigsToG
}
}

// Optimize file scanning by avoiding unnecessarily broad scans when project is not in repo root.
const allPaths = Object.values(configsByKey).map((c) => getConfigBasePath(c))
const minimalRoots = await garden.vcs.getMinimalRoots(log, allPaths)

const router = await garden.getActionRouter()

// TODO: Maybe we could optimize resolving tree versions, avoid parallel scanning of the same directory etc.
Expand Down Expand Up @@ -151,7 +156,17 @@ export const actionConfigsToGraph = profileAsync(async function actionConfigsToG
}

try {
const action = await actionFromConfig({ garden, graph, config, router, log, configsByKey, mode, linkedSources })
const action = await actionFromConfig({
garden,
graph,
config,
router,
log,
configsByKey,
mode,
linkedSources,
scanRoot: minimalRoots[getConfigBasePath(config)],
})

if (!action.supportsMode(mode)) {
if (explicitMode) {
Expand Down Expand Up @@ -186,6 +201,7 @@ export const actionFromConfig = profileAsync(async function actionFromConfig({
configsByKey,
mode,
linkedSources,
scanRoot,
}: {
garden: Garden
graph: ConfigGraph
Expand All @@ -195,6 +211,7 @@ export const actionFromConfig = profileAsync(async function actionFromConfig({
configsByKey: ActionConfigsByKey
mode: ActionMode
linkedSources: LinkedSourceMap
scanRoot?: string
}) {
// Call configure handler and validate
const { config, supportedModes, templateContext } = await preprocessActionConfig({
Expand Down Expand Up @@ -264,8 +281,10 @@ export const actionFromConfig = profileAsync(async function actionFromConfig({
}

const dependencies = dependenciesFromActionConfig(log, config, configsByKey, definition, templateContext)

const treeVersion =
config.internal.treeVersion || (await garden.vcs.getTreeVersion({ log, projectName: garden.projectName, config }))
config.internal.treeVersion ||
(await garden.vcs.getTreeVersion({ log, projectName: garden.projectName, config, scanRoot }))

const variables = await mergeVariables({
basePath: config.internal.basePath,
Expand Down
2 changes: 1 addition & 1 deletion core/src/process.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,5 +112,5 @@ export function isRunning(pid: number) {
// Note: Circumvents an issue where the process exits before the output is fully flushed.
// Needed for output renderers and Winston (see: https://github.com/winstonjs/winston/issues/228)
export async function waitForOutputFlush() {
await sleep(100)
return sleep(50)
}
32 changes: 27 additions & 5 deletions core/src/resolve-module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import type { ProviderMap } from "./config/provider"
import chalk from "chalk"
import { DependencyGraph } from "./graph/common"
import Bluebird from "bluebird"
import { readFile, mkdirp, writeFile } from "fs-extra"
import { readFile, mkdirp } from "fs-extra"
import type { Log } from "./logger/log-entry"
import { ModuleConfigContext, ModuleConfigContextParams } from "./config/template-contexts/module"
import { pathToCacheContext } from "./cache"
Expand Down Expand Up @@ -100,14 +100,17 @@ export class ModuleResolver {
// remove nodes from as we complete the processing.
const fullGraph = new DependencyGraph()
const processingGraph = new DependencyGraph()
const allPaths: string[] = []

for (const key of Object.keys(this.rawConfigsByKey)) {
for (const graph of [fullGraph, processingGraph]) {
graph.addNode(key)
}
}

for (const [key, rawConfig] of Object.entries(this.rawConfigsByKey)) {
const buildPath = this.garden.buildStaging.getBuildPath(rawConfig)
allPaths.push(rawConfig.path)
const deps = this.getModuleDependenciesFromConfig(rawConfig, buildPath)
for (const graph of [fullGraph, processingGraph]) {
for (const dep of deps) {
Expand All @@ -118,6 +121,8 @@ export class ModuleResolver {
}
}

const minimalRoots = await this.garden.vcs.getMinimalRoots(this.log, allPaths)

const resolvedConfigs: ModuleConfigMap = {}
const resolvedModules: ModuleMap = {}
const errors: { [moduleName: string]: Error } = {}
Expand Down Expand Up @@ -175,7 +180,12 @@ export class ModuleResolver {
// dependencies.
if (!foundNewDependency) {
const buildPath = this.garden.buildStaging.getBuildPath(resolvedConfig)
resolvedModules[moduleKey] = await this.resolveModule(resolvedConfig, buildPath, resolvedDependencies)
resolvedModules[moduleKey] = await this.resolveModule({
resolvedConfig,
buildPath,
dependencies: resolvedDependencies,
repoRoot: minimalRoots[resolvedConfig.path],
})
this.log.silly(`ModuleResolver: Module ${moduleKey} resolved`)
processingGraph.removeNode(moduleKey)
}
Expand Down Expand Up @@ -478,7 +488,17 @@ export class ModuleResolver {
return this.bases[type]
}

private async resolveModule(resolvedConfig: ModuleConfig, buildPath: string, dependencies: GardenModule[]) {
private async resolveModule({
resolvedConfig,
buildPath,
dependencies,
repoRoot,
}: {
resolvedConfig: ModuleConfig
buildPath: string
dependencies: GardenModule[]
repoRoot: string
}) {
this.log.silly(`Resolving module ${resolvedConfig.name}`)

// Write module files
Expand Down Expand Up @@ -542,7 +562,8 @@ export class ModuleResolver {

try {
await mkdirp(targetDir)
await writeFile(targetPath, resolvedContents)
// Use VcsHandler.writeFile() to make sure version is re-computed after writing new/updated files
await this.garden.vcs.writeFile(this.log, targetPath, resolvedContents)
} catch (error) {
throw new FilesystemError({
message: `Unable to write templated file ${fileSpec.targetPath} from ${resolvedConfig.name}: ${error.message}`,
Expand All @@ -565,6 +586,7 @@ export class ModuleResolver {
log: this.log,
config: resolvedConfig,
buildDependencies: dependencies,
scanRoot: repoRoot,
})

const moduleTypeDefinitions = await this.garden.getModuleTypes()
Expand Down Expand Up @@ -745,7 +767,7 @@ export const convertModules = profileAsync(async function convertModules(

const totalReturned = (result.actions?.length || 0) + (result.group?.actions.length || 0)

log.debug(`Module ${module.name} converted to ${totalReturned} actions`)
log.debug(`Module ${module.name} converted to ${totalReturned} action(s)`)

if (result.group) {
for (const action of result.group.actions) {
Expand Down
Loading

0 comments on commit 6cb96a6

Please sign in to comment.