From 2e2a928e75ef1c3e1439303def1f4acac763285f Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Tue, 6 Feb 2024 09:50:36 -0500 Subject: [PATCH] fix(gitparse): quoted binary files --- pkg/gitparse/gitparse.go | 38 +++++++++++++++++++++++++---------- pkg/gitparse/gitparse_test.go | 11 +++++++--- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/pkg/gitparse/gitparse.go b/pkg/gitparse/gitparse.go index 1462b5c9397a4..380617f276620 100644 --- a/pkg/gitparse/gitparse.go +++ b/pkg/gitparse/gitparse.go @@ -12,6 +12,8 @@ import ( "strings" "time" + regexp "github.com/wasilibs/go-re2" + "github.com/go-logr/logr" "github.com/trufflesecurity/trufflehog/v3/pkg/common" @@ -458,10 +460,16 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan case isBinaryLine(latestState, line): latestState = BinaryFileLine - currentDiff.PathB = pathFromBinaryLine(line) + path, err := pathFromBinaryLine(line) + if err != nil { + ctx.Logger().Error(err, "Failed to parse binary file line") + latestState = ParseFailure + continue + } // Don't do anything if the file is deleted. (pathA has file path, pathB is /dev/null) - if currentDiff.PathB != "" { + if path != "" { + currentDiff.PathB = path currentDiff.IsBinary = true } case isFromFileLine(latestState, line): @@ -707,16 +715,24 @@ func isBinaryLine(latestState ParseState, line []byte) bool { return false } +var binaryPathPat = regexp.MustCompile(`Binary files .+ and (?:/dev/null|b/(.+)|"b/(.+)") differ`) + // Get the b/ file path. Ignoring the edge case of files having `and /b` in the name for simplicity. -func pathFromBinaryLine(line []byte) string { - logger := context.Background().Logger() - sbytes := bytes.Split(line, []byte(" and b/")) - if len(sbytes) != 2 { - logger.V(2).Info("Expected binary line to be in 'Binary files a/fileA and b/fileB differ' format.", "got", line) - return "" - } - bRaw := sbytes[1] - return string(bRaw[:len(bRaw)-8]) // drop the "b/" and " differ\n" +func pathFromBinaryLine(line []byte) (string, error) { + matches := binaryPathPat.FindSubmatch(line) + if len(matches) == 0 { + err := fmt.Errorf(`expected line to match 'Binary files a/fileA and b/fileB differ', got "%s"`, line) + return "", err + } + + var path string + for _, match := range matches[1:] { // the first match is the entire input + if len(match) > 0 { + path = string(match) + break + } + } + return path, nil } // --- a/internal/addrs/move_endpoint_module.go diff --git a/pkg/gitparse/gitparse_test.go b/pkg/gitparse/gitparse_test.go index 505d6db9d7f7e..3fee460889484 100644 --- a/pkg/gitparse/gitparse_test.go +++ b/pkg/gitparse/gitparse_test.go @@ -591,12 +591,17 @@ func TestLineChecksNoStaged(t *testing.T) { func TestBinaryPathParse(t *testing.T) { cases := map[string]string{ - "Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig", - "Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf", + "Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "", + "Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig", + "Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf", + "Binary files /dev/null and \"b/assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png\" differ": "assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png", } for name, expected := range cases { - filename := pathFromBinaryLine([]byte(name)) + filename, err := pathFromBinaryLine([]byte(name)) + if err != nil { + t.Errorf("Got unexpected error: %s", err) + } if filename != expected { t.Errorf("Expected: %s, Got: %s", expected, filename) }