Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(git): fix repo scan result caching #6179

Merged
merged 6 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions core/src/vcs/git-repo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ import { FileTree } from "./file-tree.js"
import { normalize, sep } from "path"
import { stableStringify } from "../util/string.js"
import { hashString } from "../util/util.js"
import { Profile } from "../util/profiling.js"

const { pathExists } = fsExtra

type ScanRepoParams = Pick<GetFilesParams, "log" | "path" | "pathDescription" | "failOnPrompt" | "exclude">
type ScanRepoParams = Pick<GetFilesParams, "log" | "path" | "pathDescription" | "failOnPrompt">

interface GitRepoGetFilesParams extends GetFilesParams {
scanFromProjectRoot: boolean
Expand All @@ -44,7 +45,7 @@ const getIncludeExcludeFiles: IncludeExcludeFilesHandler<GitRepoGetFilesParams,
// Make sure action config is not mutated.
let exclude = !params.exclude ? [] : [...params.exclude]

// Do the same normalization of the excluded paths like in `GitHandler`.
// Do the same normalization of the excluded paths like in "subtree" scanning mode.
// This might be redundant because the non-normalized paths will be handled by `augmentGlobs` below.
// But this brings no harm and makes the implementation more clear.
exclude = exclude.map(normalize)
Expand All @@ -60,7 +61,25 @@ const getIncludeExcludeFiles: IncludeExcludeFilesHandler<GitRepoGetFilesParams,
return { include, exclude, augmentedIncludes, augmentedExcludes }
}

// @Profile()
export function getHashedFilterParams({
filter,
augmentedIncludes,
augmentedExcludes,
}: {
filter: ((path: string) => boolean) | undefined
augmentedIncludes: string[]
augmentedExcludes: string[]
}) {
return hashString(
stableStringify({
filter: filter ? filter.toString() : undefined, // We hash the source code of the filter function if provided.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because it's such an unusual thing to do, and it has some implications (e.g. captured variables will not affect the hash key) I think it's better to not hide the fact that we are converting the function to a string, by changing the signature of the function, something like the following:

export function getHashedFilterParams({
  filterFnSource,
  augmentedIncludes,
  augmentedExcludes,
}: {
  filterFnSource: string | undefined
  augmentedIncludes: string[]
  augmentedExcludes: string[]
}) {

augmentedIncludes: augmentedIncludes.sort(),
augmentedExcludes: augmentedExcludes.sort(),
})
)
}

@Profile()
export class GitRepoHandler extends AbstractGitHandler {
private readonly gitHandlerDelegate: GitSubTreeHandler
override readonly name = "git-repo"
Expand All @@ -71,7 +90,7 @@ export class GitRepoHandler extends AbstractGitHandler {
}

/**
* This has the same signature as the GitHandler super class method but instead of scanning the individual directory
* This has the same signature as the `GitSubTreeHandler` class method but instead of scanning the individual directory
* path directly, we scan the entire enclosing git repository, cache that file list and then filter down to the
* sub-path. This results in far fewer git process calls but in turn collects more data in memory.
*/
Expand All @@ -97,13 +116,11 @@ export class GitRepoHandler extends AbstractGitHandler {
const scanFromProjectRoot = scanRoot === this.projectRoot
const { augmentedExcludes, augmentedIncludes } = await getIncludeExcludeFiles({ ...params, scanFromProjectRoot })

const hashedFilterParams = hashString(
stableStringify({
filter: filter ? filter.toString() : undefined, // We hash the source code of the filter function if provided.
augmentedIncludes,
augmentedExcludes,
})
)
const hashedFilterParams = getHashedFilterParams({
filter,
augmentedIncludes,
augmentedExcludes,
})
const filteredFilesCacheKey = ["git-repo-files", path, hashedFilterParams]

const cached = this.cache.get(log, filteredFilesCacheKey) as VcsFile[] | undefined
Expand Down
2 changes: 1 addition & 1 deletion core/src/vcs/vcs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ export function describeConfig(config: ModuleConfig | BaseActionConfig): ActionD
* Checks if the {@code subPathCandidate} is a sub-path of {@code basePath}.
* Sub-path means that a candidate must be located inside a reference path.
*
* Both {@basePath} and {@ subPathCandidate} must be absolute paths
* Both {@code basePath} and {@code subPathCandidate} must be absolute paths
*
* @param basePath the reference path (absolute)
* @param subPathCandidate the path to be checked (absolute)
Expand Down
41 changes: 41 additions & 0 deletions core/test/unit/src/vcs/vcs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import { defaultDotIgnoreFile, fixedProjectExcludes } from "../../../../src/util
import { createActionLog } from "../../../../src/logger/log-entry.js"
import type { BaseActionConfig } from "../../../../src/actions/types.js"
import { TreeCache } from "../../../../src/cache.js"
import { getHashedFilterParams } from "../../../../src/vcs/git-repo.js"

export class TestVcsHandler extends VcsHandler {
override readonly name = "test"
Expand Down Expand Up @@ -664,4 +665,44 @@ describe("helpers", () => {
expect(subPath).to.be.true
})
})

describe("getHashedFilterParams", () => {
it("should return the same hashes for fully equal objects", () => {
const params1 = { filter: undefined, augmentedIncludes: ["yes.txt"], augmentedExcludes: ["no.txt"] }
const hash1 = getHashedFilterParams(params1)

const params2 = { filter: undefined, augmentedIncludes: ["yes.txt"], augmentedExcludes: ["no.txt"] }
const hash2 = getHashedFilterParams(params2)

expect(hash1).to.eql(hash2)
})

it("should return the different hashes for non-equal objects", () => {
const params1 = { filter: undefined, augmentedIncludes: ["yes1.txt"], augmentedExcludes: ["no1.txt"] }
const hash1 = getHashedFilterParams(params1)

const params2 = { filter: undefined, augmentedIncludes: ["yes2.txt"], augmentedExcludes: ["no2.txt"] }
const hash2 = getHashedFilterParams(params2)

expect(hash1).not.to.eql(hash2)
})

it("should not depend on the order of the include/exclude file lists", () => {
const params1 = {
filter: undefined,
augmentedIncludes: ["yes1.txt", "yes2.txt"],
augmentedExcludes: ["no1.txt", "no2.txt"],
}
const hash1 = getHashedFilterParams(params1)

const params2 = {
filter: undefined,
augmentedIncludes: ["yes2.txt", "yes1.txt"],
augmentedExcludes: ["no2.txt", "no1.txt"],
}
const hash2 = getHashedFilterParams(params2)

expect(hash1).to.eql(hash2)
})
})
})