Skip to content

Commit

Permalink
feat(gitparse): track commit refs
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz authored and Richard Gomez committed Apr 13, 2024
1 parent c179f00 commit c9a7acd
Show file tree
Hide file tree
Showing 11 changed files with 505 additions and 384 deletions.
7 changes: 4 additions & 3 deletions hack/snifftest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,16 @@ func main() {
SkipBinaries: true,
SkipArchives: false,
Concurrency: runtime.NumCPU(),
SourceMetadataFunc: func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData {
SourceMetadataFunc: func(repository, commit, ref, email, timestamp, file string, line int64) *source_metadatapb.MetaData {
return &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Git{
Git: &source_metadatapb.Git{
Repository: repository,
Commit: commit,
File: file,
CommitRef: ref,
Email: email,
Repository: repository,
Timestamp: timestamp,
File: file,
},
},
}
Expand Down
57 changes: 45 additions & 12 deletions pkg/gitparse/gitparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,12 @@ func (d *Diff) finalize() error {

// Commit contains commit header info and diffs.
type Commit struct {
Hash string
Author string
Date time.Time
Message strings.Builder
Size int // in bytes
SourceRef string
Hash string
Author string
Date time.Time
Message strings.Builder
Size int // in bytes

hasDiffs bool
}
Expand Down Expand Up @@ -209,13 +210,22 @@ func NewParser(options ...Option) *Parser {
// RepoPath parses the output of the `git log` command for the `source` path.
// The Diff chan will return diffs in the order they are parsed from the log.
func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool, excludedGlobs []string, isBare bool) (chan *Diff, error) {
args := []string{"-C", source, "log", "-p", "--full-history", "--date=format:%a %b %d %H:%M:%S %Y %z"}
args := []string{
"-C", source,
"log",
"-p", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
"--full-history", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---full-history
"--date=format:%a %b %d %H:%M:%S %Y %z", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---dateltformatgt
"--source", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
}
if abbreviatedLog {
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---diff-filterACDMRTUXB82308203
args = append(args, "--diff-filter=AM")
}
if head != "" {
args = append(args, head)
} else {
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---all
args = append(args, "--all")
}
for _, glob := range excludedGlobs {
Expand Down Expand Up @@ -302,10 +312,9 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
outReader := bufio.NewReader(stdOut)
var (
currentCommit *Commit

totalLogSize int
totalLogSize int
latestState = Initial
)
var latestState = Initial

diff := func(c *Commit, opts ...diffOption) *Diff {
opts = append(opts, withCustomContentWriter(bufferwriter.New(ctx)))
Expand Down Expand Up @@ -365,10 +374,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
// Create a new currentDiff and currentCommit
currentCommit = &Commit{Message: strings.Builder{}}
currentDiff = diff(currentCommit)
// Check that the commit line contains a hash and set it.
if len(line) >= 47 {
currentCommit.Hash = string(line[7:47])

hash, ref := parseCommitLine(line)
if hash == nil || ref == nil {
ctx.Logger().Error(
fmt.Errorf(`expected line to match 'commit <hash> <ref>', got "%s"`, line),
"Failed to parse CommitLine")
latestState = ParseFailure
continue
}

currentCommit.Hash = string(hash)
currentCommit.SourceRef = string(ref)
case isMergeLine(isStaged, latestState, line):
latestState = MergeLine
case isAuthorLine(isStaged, latestState, line):
Expand Down Expand Up @@ -566,6 +583,22 @@ func isCommitLine(isStaged bool, latestState ParseState, line []byte) bool {
return false
}

func parseCommitLine(line []byte) (hash []byte, ref []byte) {
// Check that the commit line contains a 40-character hash and set it.
// `commit e5575cd6f2d21d3a1a604287c7bf4a7eab2266e0\n`
if len(line) >= 47 {
hash = line[7:47]
}

// Check if the commit line includes branch references.
// `commit 2dbbb28727c7c2954438666dafba57bb8c714d3b refs/heads/fix/github-enterprise-gist\n`
if len(line) > 48 {
ref = line[48 : len(line)-1]
}

return
}

// Author: Bill Rich <[email protected]>
func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || !(latestState == CommitLine || latestState == MergeLine) {
Expand Down
17 changes: 17 additions & 0 deletions pkg/gitparse/gitparse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,23 @@ func TestLineChecksNoStaged(t *testing.T) {
}
}

func Test_parseCommitLine(t *testing.T) {
cases := map[string][]string{
"commit 198c63cb8212a99cc4352bc72f25e5444a786291 refs/heads/main\n": {"198c63cb8212a99cc4352bc72f25e5444a786291", "refs/heads/main"},
"commit e76dfb98ab9001daa869191b6aebe8cf4cd3b22a refs/remotes/origin/debug/aws-logging\n": {"e76dfb98ab9001daa869191b6aebe8cf4cd3b22a", "refs/remotes/origin/debug/aws-logging"},
}

for line, expected := range cases {
hash, ref := parseCommitLine([]byte(line))
if string(hash) != expected[0] {
t.Errorf("Expected: %s, Got: %s", expected[0], hash)
}
if string(ref) != expected[1] {
t.Errorf("Expected: %s, Got: %s", expected[1], ref)
}
}
}

func TestBinaryPathParse(t *testing.T) {
cases := map[string]string{
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",
Expand Down
27 changes: 27 additions & 0 deletions pkg/output/plain.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,33 @@ func (p *PlainPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata)

for _, data := range meta {
for k, v := range data {
// Only print Git commit refs when they're associated with a pull/merge request.
// Otherwise, this information is not useful.
if strings.EqualFold(k, "Commit_ref") {
ref, ok := v.(string)
if !ok {
continue
}

var prNum string
if strings.HasPrefix(ref, "refs/heads/trufflehog/pull/") {
prNum = ref[27:]
} else if strings.HasPrefix(ref, "refs/heads/trufflehog/merge-requests/") {
prNum = ref[37:]
}

if prNum == "" {
continue
}

k = "Pull Request"
if strings.HasSuffix(prNum, "/head") {
v = prNum[:len(prNum)-5]
} else if strings.HasSuffix(prNum, "/merge") {
v = prNum[:len(prNum)-6]
}
}

aggregateDataKeys = append(aggregateDataKeys, k)
aggregateData[k] = v
}
Expand Down
Loading

0 comments on commit c9a7acd

Please sign in to comment.