From 28d079bdad93787452175c0c4a8506a475f13fb7 Mon Sep 17 00:00:00 2001 From: ahrav Date: Mon, 5 Feb 2024 06:53:08 -0800 Subject: [PATCH 1/3] use only the DetectorKey as a map field (#2374) --- pkg/engine/ahocorasick/ahocorasickcore.go | 3 +++ pkg/engine/engine.go | 8 ++++---- pkg/engine/engine_test.go | 24 +++++++++++------------ 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/pkg/engine/ahocorasick/ahocorasickcore.go b/pkg/engine/ahocorasick/ahocorasickcore.go index b083715507db..e5eb3f60df59 100644 --- a/pkg/engine/ahocorasick/ahocorasickcore.go +++ b/pkg/engine/ahocorasick/ahocorasickcore.go @@ -23,6 +23,9 @@ type DetectorKey struct { customDetectorName string } +// Type returns the detector type of the key. +func (k DetectorKey) Type() detectorspb.DetectorType { return k.detectorType } + // AhoCorasickCore encapsulates the operations and data structures used for keyword matching via the // Aho-Corasick algorithm. It is responsible for constructing and managing the trie for efficient // substring searches, as well as mapping keywords to their associated detectors for rapid lookups. diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index eda8f3e4c0a4..3d822e4bb2e2 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -598,8 +598,8 @@ func (e *Engine) detectorWorker(ctx context.Context) { // by the same detector in the chunk. Exact matches on lookup indicate a duplicate secret for a detector // in that chunk - which is expected and not malicious. Those intra-detector dupes are still verified. type chunkSecretKey struct { - secret string - detectorInfo ahocorasick.DetectorInfo + secret string + detectorKey ahocorasick.DetectorKey } func likelyDuplicate(ctx context.Context, val chunkSecretKey, dupes map[chunkSecretKey]struct{}) bool { @@ -615,7 +615,7 @@ func likelyDuplicate(ctx context.Context, val chunkSecretKey, dupes map[chunkSec // If the detector type is the same, we don't need to compare the strings. // These are not duplicates, and should be verified. - if val.detectorInfo.Type() == dupeKey.detectorInfo.Type() { + if val.detectorKey.Type() == dupeKey.detectorKey.Type() { continue } @@ -674,7 +674,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) { // Ex: // - postman api key: PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r // - malicious detector "api key": qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r - key := chunkSecretKey{secret: string(val), detectorInfo: detector} + key := chunkSecretKey{secret: string(val), detectorKey: detector.Key} if _, ok := chunkSecrets[key]; ok { continue } diff --git a/pkg/engine/engine_test.go b/pkg/engine/engine_test.go index ea4b5807b4f5..39120d18c1cb 100644 --- a/pkg/engine/engine_test.go +++ b/pkg/engine/engine_test.go @@ -562,47 +562,47 @@ func TestLikelyDuplicate(t *testing.T) { }{ { name: "exact duplicate different detector", - val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA}, + val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorB}: {}, + {"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorB.Key}: {}, }, expected: true, }, { name: "non-duplicate length outside range", - val: chunkSecretKey{"short", detectorA}, + val: chunkSecretKey{"short", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"muchlongerthanthevalstring", detectorB}: {}, + {"muchlongerthanthevalstring", detectorB.Key}: {}, }, expected: false, }, { name: "similar within threshold", - val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA}, + val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorB}: {}, + {"qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorB.Key}: {}, }, expected: true, }, { name: "similar outside threshold", - val: chunkSecretKey{"anotherkey", detectorA}, + val: chunkSecretKey{"anotherkey", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"completelydifferent", detectorB}: {}, + {"completelydifferent", detectorB.Key}: {}, }, expected: false, }, { name: "empty strings", - val: chunkSecretKey{"", detectorA}, - dupes: map[chunkSecretKey]struct{}{{"", detectorB}: {}}, + val: chunkSecretKey{"", detectorA.Key}, + dupes: map[chunkSecretKey]struct{}{{"", detectorB.Key}: {}}, expected: true, }, { name: "similar within threshold same detector", - val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA}, + val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA}: {}, + {"qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA.Key}: {}, }, expected: false, }, From 905b7c3a01cea01747c41cced4ba85752e375c57 Mon Sep 17 00:00:00 2001 From: Dylan Ayrey Date: Mon, 5 Feb 2024 09:57:52 -0800 Subject: [PATCH 2/3] custom detector dogs (#2376) --- README.md | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8507980d9412..87eba07fea3e 100644 --- a/README.md +++ b/README.md @@ -469,11 +469,12 @@ status code, the secret is considered verified. ```yaml # config.yaml detectors: - - name: hog detector + - name: HogTokenDetector keywords: - hog regex: - adjective: hogs are (\S+) + hogID: \b(HOG[0-9A-Z]{16})\b + hogToken: [^A-Za-z0-9+\/]{0,1}([A-Za-z0-9+\/]{40})[^A-Za-z0-9+\/]{0,1} verify: - endpoint: http://localhost:8000/ # unsafe must be set if the endpoint is HTTP @@ -482,6 +483,7 @@ detectors: - "Authorization: super secret authorization header" ``` + ``` $ trufflehog filesystem /tmp --config config.yaml --only-verified 🐷🔑🐷 TruffleHog. Unearth your secrets. 🐷🔑🐷 @@ -489,9 +491,19 @@ $ trufflehog filesystem /tmp --config config.yaml --only-verified Found verified result 🐷🔑 Detector Type: CustomRegex Decoder Type: PLAIN -Raw result: hogs are cool +Raw result: HOGAAIUNNWHAHJJWUQYR File: /tmp/hog-facts.txt ``` +Data structure sent to the custom verificaiton server: + +``` +{ + "HogTokenDetector": { + "HogID": ["HOGAAIUNNWHAHJJWUQYR"], + "HogSecret": ["sD9vzqdSsAOxntjAJ/qZ9sw+8PvEYg0r7D1Hhh0C"], + } +} +``` ## Verification Server Example (Python) @@ -523,8 +535,8 @@ class Verifier(BaseHTTPRequestHandler): request = json.loads(self.rfile.read(length)) self.log_message("%s", request) - # check the match - if request['hog detector']['adjective'][-1] == 'cool': + # check the match, you'll need to implement validateToken, which takes an array of ID's and Secrets + if not validateTokens(request['HogTokenDetector']['hogID'], request['HogTokenDetector']['hogSecret']): self.send_response(200) self.end_headers() else: From 135cc3eb6945d4ca56c432328490b4860462a08a Mon Sep 17 00:00:00 2001 From: ahrav Date: Mon, 5 Feb 2024 10:43:55 -0800 Subject: [PATCH 3/3] [fixup] - correctly use the buffered file writer (#2373) * correctly use the buffered file writer * use value from source * reorder fields * use only the DetectorKey as a map field * address comments and use factory function * fix optional params * remove commented out code --- pkg/gitparse/gitparse.go | 47 +++++++++++++++++++++++++++------------- pkg/sources/git/git.go | 5 ++--- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/pkg/gitparse/gitparse.go b/pkg/gitparse/gitparse.go index 14792efb08c1..4a373124580d 100644 --- a/pkg/gitparse/gitparse.go +++ b/pkg/gitparse/gitparse.go @@ -16,6 +16,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/context" + bufferedfilewriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffered_file_writer" ) const ( @@ -100,10 +101,11 @@ func (b *buffer) String() (string, error) { return b.Buffer.String(), nil } // The use of contentWriter enables the management of diff data either in memory or on disk, // based on its size, optimizing resource usage and performance. type Diff struct { - PathB string - LineStart int + PathB string + LineStart int + IsBinary bool + contentWriter contentWriter - IsBinary bool } type diffOption func(*Diff) @@ -111,10 +113,14 @@ type diffOption func(*Diff) // withPathB sets the PathB option. func withPathB(pathB string) diffOption { return func(d *Diff) { d.PathB = pathB } } +// withCustomContentWriter sets the useCustomContentWriter option. +func withCustomContentWriter(cr contentWriter) diffOption { + return func(d *Diff) { d.contentWriter = cr } +} + // NewDiff creates a new Diff with a threshold. func NewDiff(opts ...diffOption) *Diff { diff := new(Diff) - diff.contentWriter = newBuffer() for _, opt := range opts { opt(diff) } @@ -203,7 +209,8 @@ type Parser struct { maxDiffSize int maxCommitSize int dateFormat string - contentWriter contentWriter + + useCustomContentWriter bool } type ParseState int @@ -250,11 +257,9 @@ func (state ParseState) String() string { }[state] } -// WithContentWriter sets the ContentWriter for the Parser. -func WithContentWriter(writer contentWriter) Option { - return func(parser *Parser) { - parser.contentWriter = writer - } +// UseCustomContentWriter sets useCustomContentWriter option. +func UseCustomContentWriter() Option { + return func(parser *Parser) { parser.useCustomContentWriter = true } } // WithMaxDiffSize sets maxDiffSize option. Diffs larger than maxDiffSize will @@ -283,7 +288,6 @@ func NewParser(options ...Option) *Parser { dateFormat: defaultDateFormat, maxDiffSize: defaultMaxDiffSize, maxCommitSize: defaultMaxCommitSize, - contentWriter: newBuffer(), } for _, option := range options { option(parser) @@ -387,7 +391,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch totalLogSize int ) var latestState = Initial - currentDiff := NewDiff() + + diff := func(opts ...diffOption) *Diff { + opts = append(opts, withCustomContentWriter(newBuffer())) + return NewDiff(opts...) + } + if c.useCustomContentWriter { + diff = func(opts ...diffOption) *Diff { + opts = append(opts, withCustomContentWriter(bufferedfilewriter.New())) + return NewDiff(opts...) + } + } + currentDiff := diff() defer common.RecoverWithExit(ctx) defer close(commitChan) @@ -430,7 +445,8 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch totalLogSize += currentCommit.Size } // Create a new currentDiff and currentCommit - currentDiff = NewDiff() + currentDiff = diff() + // currentDiff = NewDiff(withCustomContentWriter(c.contentWriter())) currentCommit = &Commit{Message: strings.Builder{}} // Check that the commit line contains a hash and set it. if len(line) >= 47 { @@ -498,7 +514,8 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch currentCommit.Message.WriteString(oldCommit.Message.String()) } } - currentDiff = NewDiff() + currentDiff = diff() + // currentDiff = NewDiff(withCustomContentWriter(c.contentWriter())) case isModeLine(isStaged, latestState, line): latestState = ModeLine // NoOp @@ -538,7 +555,7 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch } currentCommit.Diffs = append(currentCommit.Diffs, *currentDiff) } - currentDiff = NewDiff(withPathB(currentDiff.PathB)) + currentDiff = diff(withPathB(currentDiff.PathB)) words := bytes.Split(line, []byte(" ")) if len(words) >= 3 { diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index 9843b860cf6c..7a7de1ddb168 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -34,7 +34,6 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" "github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer" "github.com/trufflesecurity/trufflehog/v3/pkg/sources" - bufferedfilewriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffered_file_writer" ) const SourceType = sourcespb.SourceType_SOURCE_TYPE_GIT @@ -99,7 +98,7 @@ type Config struct { func NewGit(config *Config) *Git { var parser *gitparse.Parser if config.UseCustomContentWriter { - parser = gitparse.NewParser(gitparse.WithContentWriter(bufferedfilewriter.New())) + parser = gitparse.NewParser(gitparse.UseCustomContentWriter()) } else { parser = gitparse.NewParser() } @@ -522,7 +521,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string repoCtx = context.WithValue(ctx, "repo", path) } - commitChan, err := gitparse.NewParser().RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare) + commitChan, err := s.parser.RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare) if err != nil { return err }