From 658857a606cd082e53064bbb4632644dabb9d03d Mon Sep 17 00:00:00 2001 From: Richard Gomez <32133502+rgmz@users.noreply.github.com> Date: Thu, 8 Feb 2024 10:25:04 -0500 Subject: [PATCH] Update GitParse to handle quoted binary filenames (#2391) * fix(gitparse): quoted binary files * fix(gitparse): use bytes.Cut instead of regexp * fix lint warning --------- Co-authored-by: Zachary Rice --- pkg/gitparse/gitparse.go | 41 +++++++++++++++++++++++++---------- pkg/gitparse/gitparse_test.go | 11 +++++++--- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/pkg/gitparse/gitparse.go b/pkg/gitparse/gitparse.go index f54590f88ec7..5b4490272ac9 100644 --- a/pkg/gitparse/gitparse.go +++ b/pkg/gitparse/gitparse.go @@ -478,10 +478,17 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan case isBinaryLine(latestState, line): latestState = BinaryFileLine - currentDiff.PathB = pathFromBinaryLine(line) + path, ok := pathFromBinaryLine(line) + if !ok { + err = fmt.Errorf(`expected line to match 'Binary files a/fileA and b/fileB differ', got "%s"`, line) + ctx.Logger().Error(err, "Failed to parse binary file line") + latestState = ParseFailure + continue + } // Don't do anything if the file is deleted. (pathA has file path, pathB is /dev/null) - if currentDiff.PathB != "" { + if path != "" { + currentDiff.PathB = path currentDiff.IsBinary = true } case isFromFileLine(latestState, line): @@ -728,15 +735,27 @@ func isBinaryLine(latestState ParseState, line []byte) bool { } // Get the b/ file path. Ignoring the edge case of files having `and /b` in the name for simplicity. -func pathFromBinaryLine(line []byte) string { - logger := context.Background().Logger() - sbytes := bytes.Split(line, []byte(" and b/")) - if len(sbytes) != 2 { - logger.V(2).Info("Expected binary line to be in 'Binary files a/fileA and b/fileB differ' format.", "got", line) - return "" - } - bRaw := sbytes[1] - return string(bRaw[:len(bRaw)-8]) // drop the "b/" and " differ\n" +func pathFromBinaryLine(line []byte) (string, bool) { + if bytes.Contains(line, []byte("and /dev/null")) { + return "", true + } + + _, after, ok := bytes.Cut(line, []byte(" and b/")) + if ok { + // drop the " differ\n" + return string(after[:len(after)-8]), true + } + + // Edge case where the path is quoted. + // https://github.com/trufflesecurity/trufflehog/issues/2384 + _, after, ok = bytes.Cut(line, []byte(` and "b/`)) + if ok { + // drop the `" differ\n` + return string(after[:len(after)-9]), true + } + + // Unknown format. + return "", false } // --- a/internal/addrs/move_endpoint_module.go diff --git a/pkg/gitparse/gitparse_test.go b/pkg/gitparse/gitparse_test.go index 72ca23c06122..df2cfbfd26e4 100644 --- a/pkg/gitparse/gitparse_test.go +++ b/pkg/gitparse/gitparse_test.go @@ -591,12 +591,17 @@ func TestLineChecksNoStaged(t *testing.T) { func TestBinaryPathParse(t *testing.T) { cases := map[string]string{ - "Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig", - "Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf", + "Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "", + "Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig", + "Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf", + "Binary files /dev/null and \"b/assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png\" differ\n": "assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png", } for name, expected := range cases { - filename := pathFromBinaryLine([]byte(name)) + filename, ok := pathFromBinaryLine([]byte(name)) + if !ok { + t.Errorf("Failed to get path: %s", name) + } if filename != expected { t.Errorf("Expected: %s, Got: %s", expected, filename) }