From e37b84cbdad84a842ae5789ca108ab0f4a80f8d8 Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Tue, 6 Feb 2024 13:36:29 -0500 Subject: [PATCH] fix(gitparse): use bytes.Cut instead of regexp --- pkg/gitparse/gitparse.go | 39 +++++++++++++++++++---------------- pkg/gitparse/gitparse_test.go | 14 ++++++------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/pkg/gitparse/gitparse.go b/pkg/gitparse/gitparse.go index 380617f27662..c558c897e7d1 100644 --- a/pkg/gitparse/gitparse.go +++ b/pkg/gitparse/gitparse.go @@ -12,8 +12,6 @@ import ( "strings" "time" - regexp "github.com/wasilibs/go-re2" - "github.com/go-logr/logr" "github.com/trufflesecurity/trufflehog/v3/pkg/common" @@ -460,8 +458,9 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan case isBinaryLine(latestState, line): latestState = BinaryFileLine - path, err := pathFromBinaryLine(line) - if err != nil { + path, ok := pathFromBinaryLine(line) + if !ok { + err = fmt.Errorf(`expected line to match 'Binary files a/fileA and b/fileB differ', got "%s"`, line) ctx.Logger().Error(err, "Failed to parse binary file line") latestState = ParseFailure continue @@ -715,24 +714,28 @@ func isBinaryLine(latestState ParseState, line []byte) bool { return false } -var binaryPathPat = regexp.MustCompile(`Binary files .+ and (?:/dev/null|b/(.+)|"b/(.+)") differ`) - // Get the b/ file path. Ignoring the edge case of files having `and /b` in the name for simplicity. -func pathFromBinaryLine(line []byte) (string, error) { - matches := binaryPathPat.FindSubmatch(line) - if len(matches) == 0 { - err := fmt.Errorf(`expected line to match 'Binary files a/fileA and b/fileB differ', got "%s"`, line) - return "", err +func pathFromBinaryLine(line []byte) (string, bool) { + if bytes.Index(line, []byte("and /dev/null")) != -1 { + return "", true } - var path string - for _, match := range matches[1:] { // the first match is the entire input - if len(match) > 0 { - path = string(match) - break - } + _, after, ok := bytes.Cut(line, []byte(" and b/")) + if ok { + // drop the " differ\n" + return string(after[:len(after)-8]), true + } + + // Edge case where the path is quoted. + // https://github.com/trufflesecurity/trufflehog/issues/2384 + _, after, ok = bytes.Cut(line, []byte(` and "b/`)) + if ok { + // drop the `" differ\n` + return string(after[:len(after)-9]), true } - return path, nil + + // Unknown format. + return "", false } // --- a/internal/addrs/move_endpoint_module.go diff --git a/pkg/gitparse/gitparse_test.go b/pkg/gitparse/gitparse_test.go index 3fee46088948..e7b93ca6deaa 100644 --- a/pkg/gitparse/gitparse_test.go +++ b/pkg/gitparse/gitparse_test.go @@ -591,16 +591,16 @@ func TestLineChecksNoStaged(t *testing.T) { func TestBinaryPathParse(t *testing.T) { cases := map[string]string{ - "Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "", - "Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig", - "Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf", - "Binary files /dev/null and \"b/assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png\" differ": "assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png", + "Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "", + "Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig", + "Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf", + "Binary files /dev/null and \"b/assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png\" differ\n": "assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png", } for name, expected := range cases { - filename, err := pathFromBinaryLine([]byte(name)) - if err != nil { - t.Errorf("Got unexpected error: %s", err) + filename, ok := pathFromBinaryLine([]byte(name)) + if !ok { + t.Errorf("Failed to get path: %s", name) } if filename != expected { t.Errorf("Expected: %s, Got: %s", expected, filename)