From 81d58594e5850ae37e0482d79698767f9f095d13 Mon Sep 17 00:00:00 2001 From: 0x1 <13666360+0x1@users.noreply.github.com> Date: Fri, 10 Jan 2025 09:35:18 -0500 Subject: [PATCH 1/2] Update log for large s3 file (#3835) * update log for large s3 file * key and size already in ctx --- pkg/sources/s3/s3.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/sources/s3/s3.go b/pkg/sources/s3/s3.go index ec16b7d81fc9..4c13b988e1d3 100644 --- a/pkg/sources/s3/s3.go +++ b/pkg/sources/s3/s3.go @@ -457,7 +457,7 @@ func (s *Source) pageChunker( // Ignore large files. if *obj.Size > s.maxObjectSize { - ctx.Logger().V(5).Info("Skipping %d byte file (over maxObjectSize limit)") + ctx.Logger().V(5).Info("Skipping large file", "max_object_size", s.maxObjectSize) s.metricsCollector.RecordObjectSkipped(metadata.bucket, "size_limit", float64(*obj.Size)) if err := s.checkpointer.UpdateObjectCompletion(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil { ctx.Logger().Error(err, "could not update progress for large file") From 6d1c59f018be0ea3b37aec7fc4b12815c91ab69a Mon Sep 17 00:00:00 2001 From: Kashif Khan <70996046+kashifkhan0771@users.noreply.github.com> Date: Fri, 10 Jan 2025 22:25:20 +0500 Subject: [PATCH 2/2] oss-87: added new sanity detector (#3836) --- pkg/detectors/sanity/sanity.go | 105 +++++++++++++++ .../sanity/sanity_integration_test.go | 127 ++++++++++++++++++ pkg/detectors/sanity/sanity_test.go | 86 ++++++++++++ pkg/engine/defaults/defaults.go | 2 + pkg/pb/detectorspb/detectors.pb.go | 16 ++- proto/detectors.proto | 1 + 6 files changed, 331 insertions(+), 6 deletions(-) create mode 100644 pkg/detectors/sanity/sanity.go create mode 100644 pkg/detectors/sanity/sanity_integration_test.go create mode 100644 pkg/detectors/sanity/sanity_test.go diff --git a/pkg/detectors/sanity/sanity.go b/pkg/detectors/sanity/sanity.go new file mode 100644 index 000000000000..66e2d503b0ea --- /dev/null +++ b/pkg/detectors/sanity/sanity.go @@ -0,0 +1,105 @@ +package sanity + +import ( + "context" + "fmt" + "io" + "net/http" + + regexp "github.com/wasilibs/go-re2" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type Scanner struct { + client *http.Client +} + +// Ensure the Scanner satisfies the interface at compile time. +var _ detectors.Detector = (*Scanner)(nil) + +var ( + authTokenPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sanity"}) + `\b(sk[A-Za-z0-9]{79})\b`) +) + +// Keywords are used for efficiently pre-filtering chunks. +// Use identifiers in the secret preferably, or the provider name. +func (s Scanner) Keywords() []string { + return []string{"sanity"} +} + +func (s Scanner) Description() string { + return "Sanity is the modern CMS that transforms content into a competitive advantage. Customize, collaborate, and scale your digital experiences seamlessly." +} + +// FromData will find and optionally verify Meraki API Key secrets in a given set of bytes. +func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + dataStr := string(data) + + // uniqueMatches will hold unique match values and ensure we only process unique matches found in the data string + var uniqueMatches = make(map[string]struct{}) + + for _, match := range authTokenPat.FindAllStringSubmatch(dataStr, -1) { + uniqueMatches[match[1]] = struct{}{} + } + + for match := range uniqueMatches { + s1 := detectors.Result{ + DetectorType: detectorspb.DetectorType_Sanity, + Raw: []byte(match), + } + + if verify { + if s.client == nil { + s.client = common.SaneHttpClient() + } + + isVerified, verificationErr := verifySanityAuthToken(ctx, s.client, match) + s1.Verified = isVerified + s1.SetVerificationError(verificationErr) + } + + results = append(results, s1) + } + + return results, nil +} + +func (s Scanner) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_Sanity +} + +/* +verifySanityAuthToken verifies if the passed matched auth token for sanity is active or not. +auth docs: https://www.sanity.io/docs/http-auth +api docs: https://www.sanity.io/docs/reference/http/access#tag/permissions/GET/vX/access/permissions/me +*/ +func verifySanityAuthToken(ctx context.Context, client *http.Client, authToken string) (bool, error) { + req, err := http.NewRequestWithContext(ctx, "GET", "https://api.sanity.io/vX/access/permissions/me", http.NoBody) + if err != nil { + return false, err + } + + // set the required auth header + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", authToken)) + + resp, err := client.Do(req) + if err != nil { + return false, err + } + defer func() { + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() + }() + + switch resp.StatusCode { + case http.StatusOK: + return true, nil + case http.StatusUnauthorized, http.StatusForbidden: + return false, nil + default: + return false, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } +} diff --git a/pkg/detectors/sanity/sanity_integration_test.go b/pkg/detectors/sanity/sanity_integration_test.go new file mode 100644 index 000000000000..3e76f40db2ba --- /dev/null +++ b/pkg/detectors/sanity/sanity_integration_test.go @@ -0,0 +1,127 @@ +//go:build detectors +// +build detectors + +package sanity + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestSanity_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("SANITY_AUTHTOKEN") + inactiveSecret := testSecrets.MustGetField("SANITY_AUTHTOKEN_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: ctx, + data: []byte(fmt.Sprintf("You can find a sanity apikey %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Sanity, + Verified: true, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: ctx, + data: []byte(fmt.Sprintf("You can find a sanity apikey secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Sanity, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + wantVerificationErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Sanity.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if (got[i].VerificationError() != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("Sanity.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/sanity/sanity_test.go b/pkg/detectors/sanity/sanity_test.go new file mode 100644 index 000000000000..8fc25b2d2c9c --- /dev/null +++ b/pkg/detectors/sanity/sanity_test.go @@ -0,0 +1,86 @@ +package sanity + +import ( + "context" + "fmt" + "testing" + + "github.com/google/go-cmp/cmp" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) + +var ( + validPattern = `Information used in API calls for sanity +Variable name | Initial Value +sanity auth token |skCrESWvpXysjmfakeaMGdMecRnw2mTVURqlVABArKApL1j4SLUhFAKEEizjp7ymM8pebv0ScqyqelbLD +networkId |L_646829496481117067 +serial |` + + invalidPattern = "skCr_SWvpXysjmfakeaMGdMecRnw$mTVURqlVABArKApL1j4SLUhFAKEEizjp7ymM8pebv0ScqyqelbLD" +) + +func TestSanity_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) + + tests := []struct { + name string + input string + want []string + }{ + { + name: "valid pattern", + input: validPattern, + want: []string{"skCrESWvpXysjmfakeaMGdMecRnw2mTVURqlVABArKApL1j4SLUhFAKEEizjp7ymM8pebv0ScqyqelbLD"}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("sanity = '%s'", invalidPattern), + want: nil, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return + } + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } + }) + } +} diff --git a/pkg/engine/defaults/defaults.go b/pkg/engine/defaults/defaults.go index d2acbdca4793..2591235829a4 100644 --- a/pkg/engine/defaults/defaults.go +++ b/pkg/engine/defaults/defaults.go @@ -601,6 +601,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/salesflare" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/salesforce" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/salesmate" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/sanity" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/satismeterprojectkey" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/satismeterwritekey" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/saucelabs" @@ -1444,6 +1445,7 @@ func buildDetectorList() []detectors.Detector { &salesflare.Scanner{}, &salesforce.Scanner{}, &salesmate.Scanner{}, + &sanity.Scanner{}, &satismeterprojectkey.Scanner{}, &satismeterwritekey.Scanner{}, &saucelabs.Scanner{}, diff --git a/pkg/pb/detectorspb/detectors.pb.go b/pkg/pb/detectorspb/detectors.pb.go index 68664605aa32..6d833b800d27 100644 --- a/pkg/pb/detectorspb/detectors.pb.go +++ b/pkg/pb/detectorspb/detectors.pb.go @@ -1116,6 +1116,7 @@ const ( DetectorType_Flexport DetectorType = 1009 DetectorType_TwitchAccessToken DetectorType = 1010 DetectorType_TwilioApiKey DetectorType = 1011 + DetectorType_Sanity DetectorType = 1012 ) // Enum value maps for DetectorType. @@ -2129,6 +2130,7 @@ var ( 1009: "Flexport", 1010: "TwitchAccessToken", 1011: "TwilioApiKey", + 1012: "Sanity", } DetectorType_value = map[string]int32{ "Alibaba": 0, @@ -3139,6 +3141,7 @@ var ( "Flexport": 1009, "TwitchAccessToken": 1010, "TwilioApiKey": 1011, + "Sanity": 1012, } ) @@ -3592,7 +3595,7 @@ var file_detectors_proto_rawDesc = []byte{ 0x4c, 0x41, 0x49, 0x4e, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x42, 0x41, 0x53, 0x45, 0x36, 0x34, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x54, 0x46, 0x31, 0x36, 0x10, 0x03, 0x12, 0x13, 0x0a, 0x0f, 0x45, 0x53, 0x43, 0x41, 0x50, 0x45, 0x44, 0x5f, 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, - 0x10, 0x04, 0x2a, 0xaf, 0x81, 0x01, 0x0a, 0x0c, 0x44, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x10, 0x04, 0x2a, 0xbc, 0x81, 0x01, 0x0a, 0x0c, 0x44, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x6c, 0x69, 0x62, 0x61, 0x62, 0x61, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x41, 0x4d, 0x51, 0x50, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x41, 0x57, 0x53, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x41, 0x7a, 0x75, 0x72, 0x65, 0x10, 0x03, 0x12, @@ -4627,11 +4630,12 @@ var file_detectors_proto_rawDesc = []byte{ 0x6c, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x10, 0xf1, 0x07, 0x12, 0x16, 0x0a, 0x11, 0x54, 0x77, 0x69, 0x74, 0x63, 0x68, 0x41, 0x63, 0x63, 0x65, 0x73, 0x73, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x10, 0xf2, 0x07, 0x12, 0x11, 0x0a, 0x0c, 0x54, 0x77, 0x69, 0x6c, 0x69, 0x6f, 0x41, 0x70, 0x69, 0x4b, - 0x65, 0x79, 0x10, 0xf3, 0x07, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, - 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x73, 0x65, 0x63, 0x75, 0x72, - 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x68, 0x6f, 0x67, 0x2f, 0x76, - 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, - 0x72, 0x73, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x65, 0x79, 0x10, 0xf3, 0x07, 0x12, 0x0b, 0x0a, 0x06, 0x53, 0x61, 0x6e, 0x69, 0x74, 0x79, 0x10, + 0xf4, 0x07, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, + 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x68, 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f, 0x70, + 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x70, + 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/proto/detectors.proto b/proto/detectors.proto index ba3f8ddfd431..656abe6562a5 100644 --- a/proto/detectors.proto +++ b/proto/detectors.proto @@ -1021,6 +1021,7 @@ enum DetectorType { Flexport = 1009; TwitchAccessToken = 1010; TwilioApiKey = 1011; + Sanity = 1012; } message Result {