Skip to content

Commit

Permalink
feat(decoder): add percent/url (#52)
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz authored Dec 27, 2024
1 parent bdd0853 commit 64f0518
Show file tree
Hide file tree
Showing 8 changed files with 1,266 additions and 1,058 deletions.
5 changes: 3 additions & 2 deletions pkg/decoders/decoders.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ func DefaultDecoders() []Decoder {
return []Decoder{
// UTF8 must be first for duplicate detection
&UTF8{},
&Base64{},
&UTF16{},
&EscapedUnicode{},
&HtmlEntity{},
&Percent{},
&Base64{},
&UTF16{},
}
}

Expand Down
149 changes: 149 additions & 0 deletions pkg/decoders/percent.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package decoders

import (
"bytes"
"fmt"
"regexp"
"sync"

ahocorasick "github.com/BobuSumisu/aho-corasick"
"github.com/go-logr/logr"

"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)

// Percent decodes characters that are percent encoded.
// https://developer.mozilla.org/en-US/docs/Glossary/Percent-encoding
// https://en.wikipedia.org/wiki/Percent-encoding
type Percent struct{}

var (
_ Decoder = (*Percent)(nil)

percentOnce sync.Once
percentTrie *ahocorasick.Trie
percentEncodingToChar = map[string]string{}
)

func init() {
// Use Aho-Corasick to pre-filter potential matches.
percentOnce.Do(func() {
specialChars := map[string][]string{
"!": {"%21"},
"#": {"%23"},
"$": {"%24"},
"%": {"%25"},
"&": {"%26"},
"'": {"%27"},
"(": {"%28"},
")": {"%29"},
"*": {"%2A", "%2a"},
"+": {"%2B", "%2b"},
",": {"%2C", "%2c"},
"/": {"%2F", "%2f"},
":": {"%3A", "%3a"},
";": {"%3B", "%3b"},
"=": {"%3D", "%3d"},
"?": {"%3F", "%3f"},
"@": {"%40"},
"[": {"%5B", "%5b"},
"]": {"%5D", "%5d"},
" ": {"%20"}, // Space should also be percent encoded
`"`: {"%22"}, // Double quote
"<": {"%3C", "%3c"},
">": {"%3E", "%3e"},
`\`: {"%5C", "%5c"},
"^": {"%5E", "%5e"},
"`": {"%60"},
"{": {"%7B", "%7b"},
"|": {"%7C", "%7c"},
"}": {"%7D", "%7d"},
}

var keywords []string
for char, encodings := range specialChars {
for _, encoding := range encodings {
percentEncodingToChar[encoding] = char
keywords = append(keywords, encoding)
}
}
percentTrie = ahocorasick.NewTrieBuilder().AddStrings(keywords).Build()
})
}

func (d *Percent) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_PERCENT
}

func (d *Percent) FromChunk(ctx context.Context, chunk *sources.Chunk) *DecodableChunk {
if chunk == nil || len(chunk.Data) == 0 {
return nil
} else if m := percentTrie.MatchFirst(chunk.Data); m == nil {
return nil
}

var (
logger = ctx.Logger().WithName("decoders.percent")
// Necessary to avoid data races.
chunkData = bytes.Clone(chunk.Data)
matched = false
)
if percentEncodedPat.Match(chunkData) {
matched = true
chunkData = decoderPercent(logger, chunkData)
}

if matched {
return &DecodableChunk{
DecoderType: d.Type(),
Chunk: &sources.Chunk{
Data: chunkData,
SourceName: chunk.SourceName,
SourceID: chunk.SourceID,
JobID: chunk.JobID,
SecretID: chunk.SecretID,
SourceMetadata: chunk.SourceMetadata,
SourceType: chunk.SourceType,
Verify: chunk.Verify,
},
}
} else {
return nil
}
}

// `!` = `%21`
var percentEncodedPat = regexp.MustCompile(`(?i)%[a-f0-9]{2}`)

func decoderPercent(logger logr.Logger, input []byte) []byte {
var (
encoded string
decoded = make([]byte, 0, len(input))
lastIndex = 0
)

for _, match := range percentEncodedPat.FindAllSubmatchIndex(input, -1) {
startIndex := match[0]
endIndex := match[1]

// Copy the part of the input until the start of the entity
decoded = append(decoded, input[lastIndex:startIndex]...)

// Append the decoded byte
encoded = string(input[startIndex:endIndex])
char, ok := percentEncodingToChar[encoded]
if !ok {
logger.Error(fmt.Errorf("unrecognized encoding"), "Unable to decode percent entity", "match", encoded)
continue
}
decoded = append(decoded, []byte(char)...)
lastIndex = endIndex
}

// Append the remaining part of the input
decoded = append(decoded, input[lastIndex:]...)

return decoded
}
67 changes: 67 additions & 0 deletions pkg/decoders/percent_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package decoders

import (
"testing"

"github.com/kylelemons/godebug/pretty"

"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)

func TestUrlDecoder_FromChunk(t *testing.T) {
tests := []struct {
name string
chunk *sources.Chunk
want *sources.Chunk
wantErr bool
}{
// Valid
{
name: "uppercase",
chunk: &sources.Chunk{
Data: []byte("aws_session_token=FwoGZXIvYXdzED0aDNHw4GhQvSFSCn8vUCK6Af%2BKK2QGsRbN5F22xJvXyNyYoAzxTkPYrSgvvuL7%2F17tyBa5LMeHWSKV%2F9E3ON2vRSLIz0iFfeEE5cj4zmbqpw%2F5LAiDiptTvbQQKmzCE4Pt05khFcsTmwsju9ibR5Mx2oJKdHHQXCsqk0XjvugSuu%2BKbU0wigO2oSXvu1dguNg%2Bj6RTdxGAS7Uoih2WZR4ZlJCdcFNOivhf%2FkWs18mMRQ43r47GWsV9Z3vlTaMimHLWuBMldPgBcJV2iCiWrpnwBTIt2Dfkgvi8Bs7OcInotWE751K48QJnzcwPMKjsNKBE0tf1kGI9JArO8x%2BaDQJX%3D%3D"),
},
want: &sources.Chunk{
Data: []byte("aws_session_token=FwoGZXIvYXdzED0aDNHw4GhQvSFSCn8vUCK6Af+KK2QGsRbN5F22xJvXyNyYoAzxTkPYrSgvvuL7/17tyBa5LMeHWSKV/9E3ON2vRSLIz0iFfeEE5cj4zmbqpw/5LAiDiptTvbQQKmzCE4Pt05khFcsTmwsju9ibR5Mx2oJKdHHQXCsqk0XjvugSuu+KbU0wigO2oSXvu1dguNg+j6RTdxGAS7Uoih2WZR4ZlJCdcFNOivhf/kWs18mMRQ43r47GWsV9Z3vlTaMimHLWuBMldPgBcJV2iCiWrpnwBTIt2Dfkgvi8Bs7OcInotWE751K48QJnzcwPMKjsNKBE0tf1kGI9JArO8x+aDQJX=="),
},
},
{
name: "lowercase",
chunk: &sources.Chunk{
Data: []byte("https://r2.cloudflarestorage.com/codegeex/codegeex_13b.tar.gz.0?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=b279482b3a1b5758740371cde86a9b62%2f20230112%2fus-east-1%2fs3%2faws4_request&X-Amz-Date=20230112T035544Z&X-Amz-Expires=259200&X-Amz-Signature=eaeb7b40bc57c63bbe33991620240e5bdb4bb97f51bc382b32a1a699a47a94ff&X-Amz-SignedHeaders=host\n"),
},
want: &sources.Chunk{
Data: []byte("https://r2.cloudflarestorage.com/codegeex/codegeex_13b.tar.gz.0?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=b279482b3a1b5758740371cde86a9b62/20230112/us-east-1/s3/aws4_request&X-Amz-Date=20230112T035544Z&X-Amz-Expires=259200&X-Amz-Signature=eaeb7b40bc57c63bbe33991620240e5bdb4bb97f51bc382b32a1a699a47a94ff&X-Amz-SignedHeaders=host\n"),
},
},

// Invalid
{
name: "no escaped",
chunk: &sources.Chunk{
Data: []byte(`-//npm.fontawesome.com/:_authToken=%YOUR_TOKEN%`),
},
want: nil,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
d := &Percent{}
got := d.FromChunk(context.Background(), tt.chunk)
if tt.want != nil {
if got == nil {
t.Fatal("got nil, did not want nil")
}
if diff := pretty.Compare(string(tt.want.Data), string(got.Data)); diff != "" {
t.Errorf("UrlDecoder.FromChunk() %s diff: (-want +got)\n%s", tt.name, diff)
}
} else {
if got != nil {
t.Error("Expected nil chunk")
}
}
})
}
}
1 change: 0 additions & 1 deletion pkg/detectors/aws/access_keys/accesskey.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ func (s scanner) Keywords() []string {
func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
logger := logContext.AddLogger(ctx).Logger().WithName("aws")
dataStr := string(data)
dataStr = aws.UrlEncodedReplacer.Replace(dataStr)

// Filter & deduplicate matches.
idMatches := make(map[string]struct{})
Expand Down
1 change: 0 additions & 1 deletion pkg/detectors/aws/session_keys/sessionkey.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ func (s scanner) Keywords() []string {
func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
logger := logContext.AddLogger(ctx).Logger().WithName("awssessionkey")
dataStr := string(data)
dataStr = aws.UrlEncodedReplacer.Replace(dataStr)

// Filter & deduplicate matches.
idMatches := make(map[string]struct{})
Expand Down
11 changes: 0 additions & 11 deletions pkg/detectors/aws/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,6 @@ var ResourceTypes = map[string]string{
"ASIA": "Temporary (AWS STS) access key IDs",
}

// UrlEncodedReplacer helps capture base64-encoded results that may be url-encoded.
// TODO: Add this as a decoder, or make it a more generic.
var UrlEncodedReplacer = strings.NewReplacer(
"%2B", "+",
"%2b", "+",
"%2F", "/",
"%2f", "/",
"%3d", "=",
"%3D", "=",
)

// Hashes, like those for git, do technically match the secret pattern.
// But they are extremely unlikely to be generated as an actual AWS secret.
// So when we find them, if they're not verified, we should ignore the result.
Expand Down
Loading

0 comments on commit 64f0518

Please sign in to comment.