Skip to content

Commit

Permalink
wip: use --mirror
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz authored and Richard Gomez committed Jun 17, 2024
1 parent 0041ada commit 8f9c202
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 30 deletions.
14 changes: 7 additions & 7 deletions hack/snifftest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,16 +197,16 @@ func main() {
SkipBinaries: true,
SkipArchives: false,
Concurrency: runtime.NumCPU(),
SourceMetadataFunc: func(repository, commit, ref, email, timestamp, file string, line int64) *source_metadatapb.MetaData {
SourceMetadataFunc: func(repository, commit, commitSource, email, timestamp, file string, line int64) *source_metadatapb.MetaData {
return &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Git{
Git: &source_metadatapb.Git{
Repository: repository,
Commit: commit,
CommitRef: ref,
Email: email,
Timestamp: timestamp,
File: file,
Repository: repository,
Commit: commit,
CommitSource: commitSource,
Email: email,
Timestamp: timestamp,
File: file,
},
},
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/engine/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ func (e *Engine) ScanGitHub(ctx context.Context, c sources.GithubConfig) error {
opts := []git.ScanOption{
git.ScanOptionFilter(c.Filter),
git.ScanOptionLogOptions(logOptions),
// Repositories are cloned with `--mirror` which is bare.
git.ScanOptionBare(true),
}
scanOptions := git.NewScanOptions(opts...)

Expand Down
2 changes: 2 additions & 0 deletions pkg/engine/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ func (e *Engine) ScanGitLab(ctx context.Context, c sources.GitlabConfig) error {
opts := []git.ScanOption{
git.ScanOptionFilter(c.Filter),
git.ScanOptionLogOptions(logOptions),
// Repositories are cloned with `--mirror` which is bare.
git.ScanOptionBare(true),
}
scanOptions := git.NewScanOptions(opts...)

Expand Down
17 changes: 8 additions & 9 deletions pkg/gitparse/gitparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ func NewParser(options ...Option) *Parser {
func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool, excludedGlobs []string, isBare bool) (chan *Diff, error) {
args := []string{
"-C", source,
"--no-replace-objects",
"log",
"--patch", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
"--full-history",
Expand Down Expand Up @@ -636,28 +637,26 @@ func parseCommitLine(line []byte) (hash []byte, ref []byte) {
// ParseCommitSource s
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
func parseSourceRef(ref []byte) string {
// Remove the `refs/heads/thog` prefix.
// (We don't care about refs without this prefix.)
ref, ok := bytes.CutPrefix(ref, []byte("refs/heads/thog/"))
if !ok {
// We don't care about 'normal' refs.
if bytes.HasPrefix(ref, []byte("refs/heads/")) || bytes.HasPrefix(ref, []byte("refs/tags/")) {
return ""
}

// Handle GitHub pull requests.
// e.g., `pr/238/head` or `pr/1234/merge`
if after, ok := bytes.CutPrefix(ref, []byte("pr/")); ok {
// e.g., `refs/pull/238/head` or `refs/pull/1234/merge`
if after, ok := bytes.CutPrefix(ref, []byte("refs/pull/")); ok {
prNumber := after[:bytes.Index(after, []byte("/"))]
return "Pull request #" + string(prNumber)
}

// Handle GitLab merge requests
// e.g., `mr/238/head` or `mr/1234/merge`
if after, ok := bytes.CutPrefix(ref, []byte("mr/")); ok {
// e.g., `refs/merge-requests/238/head` or `refs/merge-requests/1234/merge`
if after, ok := bytes.CutPrefix(ref, []byte("refs/merge-requests/")); ok {
mrNumber := after[:bytes.Index(after, []byte("/"))]
return "Merge request #" + string(mrNumber)
}

return ""
return fmt.Sprintf("%s (hidden ref)", string(ref))
}

// Author: Bill Rich <[email protected]>
Expand Down
44 changes: 35 additions & 9 deletions pkg/sources/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,11 @@ func (s *Source) Init(aCtx context.Context, name string, jobId sources.JobID, so
func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, _ ...sources.ChunkingTarget) error {
reporter := sources.ChanReporter{Ch: chunksChan}
if err := s.scanRepos(ctx, reporter); err != nil {
ctx.Logger().Error(err, "Chunks scanRepos")
return err
}
if err := s.scanDirs(ctx, reporter); err != nil {
ctx.Logger().Error(err, "Chunks scanDirs")
return err
}

Expand All @@ -251,6 +253,7 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, _ .

// scanRepos scans the configured repositories in s.conn.Repositories.
func (s *Source) scanRepos(ctx context.Context, reporter sources.ChunkReporter) error {
ctx.Logger().Info("scanRepos", "repos", s.conn.Repositories)
if len(s.conn.Repositories) == 0 {
return nil
}
Expand All @@ -270,6 +273,7 @@ func (s *Source) scanRepos(ctx context.Context, reporter sources.ChunkReporter)

// scanRepo scans a single provided repository.
func (s *Source) scanRepo(ctx context.Context, repoURI string, reporter sources.ChunkReporter) error {
ctx.Logger().Info("scanRepo", "uri", repoURI)
var cloneFunc func() (string, *git.Repository, error)
switch cred := s.conn.GetCredential().(type) {
case *sourcespb.Git_BasicAuth:
Expand Down Expand Up @@ -323,13 +327,16 @@ func (s *Source) scanDirs(ctx context.Context, reporter sources.ChunkReporter) e

// scanDir scans a single provided directory.
func (s *Source) scanDir(ctx context.Context, gitDir string, reporter sources.ChunkReporter) error {
ctx.Logger().Info("scanDir", "dir", gitDir)
if !s.scanOptions.Bare && strings.HasSuffix(gitDir, "git") {
ctx.Logger().Info("skipping bare repo", "dir", gitDir)
// TODO: Figure out why we skip directories ending in "git".
return nil
}
// try paths instead of url
repo, err := RepoFromPath(gitDir, s.scanOptions.Bare)
if err != nil {
ctx.Logger().Error(err, "error getting RepoFromPath", "dir", gitDir)
return reporter.ChunkErr(ctx, err)
}

Expand Down Expand Up @@ -417,9 +424,13 @@ func executeClone(ctx context.Context, params cloneParams) (*git.Repository, err
}

gitArgs := []string{
"clone", cloneURL.String(),
params.clonePath,
"--quiet", // https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--quietcode
"clone", cloneURL.String(), params.clonePath,
// Don't output non-vital information.
// https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--quietcode
"--quiet",
// Fetch all refs from the remote.
// https://github.com/trufflesecurity/trufflehog/issues/1588
"--mirror",
}
gitArgs = append(gitArgs, params.args...)
cloneCmd := exec.Command("git", gitArgs...)
Expand Down Expand Up @@ -456,10 +467,9 @@ func executeClone(ctx context.Context, params cloneParams) (*git.Repository, err
return nil, fmt.Errorf("could not clone repo: %s, %w", safeURL, err)
}

options := &git.PlainOpenOptions{DetectDotGit: true, EnableDotGitCommonDir: true}
repo, err := git.PlainOpenWithOptions(params.clonePath, options)
repo, err := git.PlainOpen(params.clonePath)
if err != nil {
return nil, fmt.Errorf("could not open cloned repo: %w", err)
return nil, fmt.Errorf("could not open cloned repo %s: %w", safeURL, err)
}
logger.V(1).Info("successfully cloned repo")

Expand Down Expand Up @@ -537,6 +547,13 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
repoCtx = ctx
}

var gitDir string
if scanOptions.Bare {
gitDir = path
} else {
gitDir = filepath.Join(path, gitDirName)
}

logger := repoCtx.Logger()
var logValues []any
if scanOptions.BaseHash != "" {
Expand All @@ -557,8 +574,6 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
return nil
}

gitDir := filepath.Join(path, gitDirName)

logger.Info("scanning repo", logValues...)

var depth int64
Expand Down Expand Up @@ -787,7 +802,13 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string,
}

reachedBase := false
gitDir := filepath.Join(path, gitDirName)

var gitDir string
if scanOptions.Bare {
gitDir = path
} else {
gitDir = filepath.Join(path, gitDirName)
}

logger := ctx.Logger()
var logValues []any
Expand Down Expand Up @@ -908,6 +929,7 @@ func (s *Git) ScanRepo(ctx context.Context, repo *git.Repository, repoPath strin
if scanOptions == nil {
scanOptions = NewScanOptions()
}
ctx.Logger().Info("ScanRepo")
if err := normalizeConfig(scanOptions, repo); err != nil {
return err
}
Expand Down Expand Up @@ -1063,6 +1085,7 @@ func TryAdditionalBaseRefs(repo *git.Repository, base string) (*plumbing.Hash, e

// prepareRepoSinceCommit clones a repo starting at the given commitHash and returns the cloned repo path.
func prepareRepoSinceCommit(ctx context.Context, uriString, commitHash string) (string, bool, error) {
ctx.Logger().Info("prepareRepoSinceCommit", "commit", commitHash)
if commitHash == "" {
return PrepareRepo(ctx, uriString)
}
Expand Down Expand Up @@ -1248,15 +1271,18 @@ func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources.

func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error {
for _, repo := range s.conn.GetDirectories() {
ctx.Logger().Info("enumerating dirs", "repo", repo)
if repo == "" {
continue
}
unit := SourceUnit{ID: repo, Kind: UnitDir}
if err := reporter.UnitOk(ctx, unit); err != nil {
ctx.Logger().Error(err, "failed to chunk")
return err
}
}
for _, repo := range s.conn.GetRepositories() {
ctx.Logger().Info("enumerating repos", "repo", repo)
if repo == "" {
continue
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -788,9 +788,6 @@ func (s *Source) cloneAndScanRepo(ctx context.Context, client *github.Client, re
}
defer os.RemoveAll(path)

// TODO: Can this be set once or does it need to be set on every iteration? Is |s.scanOptions| set every clone?
s.setScanOptions(s.conn.Base, s.conn.Head)

// Repo size is not collected for wikis.
var logger logr.Logger
if !strings.HasSuffix(repoURL, ".wiki.git") && repoInfo.size > 0 {
Expand All @@ -800,6 +797,9 @@ func (s *Source) cloneAndScanRepo(ctx context.Context, client *github.Client, re
}
logger.V(2).Info("scanning repo")

// TODO: Can this be set once or does it need to be set on every iteration? Is |s.scanOptions| set every clone?
s.setScanOptions(s.conn.Base, s.conn.Head)

start := time.Now()
if err = s.git.ScanRepo(ctx, repo, path, s.scanOptions, sources.ChanReporter{Ch: chunksChan}); err != nil {
return duration, fmt.Errorf("error scanning repo %s: %w", repoURL, err)
Expand Down
2 changes: 0 additions & 2 deletions pkg/sources/github/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ func (s *Source) cloneRepo(
if err != nil {
return "", nil, err
}

case *sourcespb.GitHub_GithubApp:
s.githubUser, s.githubToken, err = s.userAndToken(ctx, installationClient)
if err != nil {
Expand All @@ -86,7 +85,6 @@ func (s *Source) cloneRepo(
if err != nil {
return "", nil, err
}

case *sourcespb.GitHub_Token:
if err := s.getUserAndToken(ctx, repoURL, installationClient); err != nil {
return "", nil, fmt.Errorf("error getting token for repo %s: %w", repoURL, err)
Expand Down

0 comments on commit 8f9c202

Please sign in to comment.