Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(secret): enhance secret scanning for python binary files #7223

Merged
merged 14 commits into from
Sep 30, 2024
4 changes: 3 additions & 1 deletion docs/docs/scanner/secret.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
Trivy scans any container image, filesystem and git repository to detect exposed secrets like passwords, api keys, and tokens.
Secret scanning is enabled by default.

Trivy will scan every plaintext file, according to builtin rules or configuration. There are plenty of builtin rules:
Trivy will scan every plaintext file, according to builtin rules or configuration. Also, Trivy can detect secrets in compiled Python files (`.pyc`).

There are plenty of builtin rules:

- AWS access key
- GCP service account
Expand Down
27 changes: 21 additions & 6 deletions pkg/fanal/analyzer/secret/secret.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ var (
".gz",
".gzip",
".tar",
}

allowedBinaries = []string{
".pyc",
}
)
Expand All @@ -63,6 +66,10 @@ func init() {
analyzer.RegisterAnalyzer(NewSecretAnalyzer(secret.Scanner{}, ""))
}

func isAllowedBinary(filename string) bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if we omit the is prefix?

Suggested change
func isAllowedBinary(filename string) bool {
func allowedBinary(filename string) bool {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return slices.Contains(allowedBinaries, filepath.Ext(filename))
}

// SecretAnalyzer is an analyzer for secrets
type SecretAnalyzer struct {
scanner secret.Scanner
Expand Down Expand Up @@ -96,20 +103,28 @@ func (a *SecretAnalyzer) Init(opt analyzer.AnalyzerOptions) error {
func (a *SecretAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) {
// Do not scan binaries
binary, err := utils.IsBinary(input.Content, input.Info.Size())
if binary || err != nil {
if err != nil || (binary && !isAllowedBinary(input.FilePath)) {
return nil, nil
}

if size := input.Info.Size(); size > 10485760 { // 10MB
log.WithPrefix("secret").Warn("The size of the scanned file is too large. It is recommended to use `--skip-files` for this file to avoid high memory consumption.", log.FilePath(input.FilePath), log.Int64("size (MB)", size/1048576))
}

content, err := io.ReadAll(input.Content)
if err != nil {
return nil, xerrors.Errorf("read error %s: %w", input.FilePath, err)
}
var content []byte

content = bytes.ReplaceAll(content, []byte("\r"), []byte(""))
if !binary {
content, err = io.ReadAll(input.Content)
if err != nil {
return nil, xerrors.Errorf("read error %s: %w", input.FilePath, err)
}
content = bytes.ReplaceAll(content, []byte("\r"), []byte(""))
} else {
content, err = utils.ExtractPrintableBytes(input.Content)
if err != nil {
return nil, xerrors.Errorf("binary read error %s: %w", input.FilePath, err)
}
}

filePath := input.FilePath
// Files extracted from the image have an empty input.Dir.
Expand Down
39 changes: 39 additions & 0 deletions pkg/fanal/analyzer/secret/secret_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,30 @@ func TestSecretAnalyzer(t *testing.T) {
},
},
}
wantFindingGH_PAT := types.SecretFinding{
RuleID: "github-fine-grained-pat",
Category: "GitHub",
Title: "GitHub Fine-grained personal access tokens",
Severity: "CRITICAL",
StartLine: 1,
EndLine: 1,
Match: " o e\xabfx \xe3 @ s d Z e e d S ) \xda]*********************************************************************************************N) \xda secret1\xda print\xa9",
Code: types.Code{
Lines: []types.Line{
{
Number: 1,
Content: " o e\xabfx \xe3 @ s d Z e e d S ) \xda]*********************************************************************************************N) \xda secret1\xda print\xa9",
IsCause: true,
Annotation: "",
Truncated: false,
Highlighted: " o e\xabfx \xe3 @ s d Z e e d S ) \xda]*********************************************************************************************N) \xda secret1\xda print\xa9",
FirstCause: true,
LastCause: true,
},
},
},
}

tests := []struct {
name string
configPath string
Expand Down Expand Up @@ -153,6 +177,21 @@ func TestSecretAnalyzer(t *testing.T) {
filePath: "testdata/binaryfile",
want: nil,
},
{
name: "python binary file",
configPath: "testdata/skip-tests-config.yaml",
filePath: "testdata/secret.cpython-310.pyc",
want: &analyzer.AnalysisResult{
Secrets: []types.Secret{
{
FilePath: "/testdata/secret.cpython-310.pyc",
Findings: []types.SecretFinding{
wantFindingGH_PAT,
},
},
},
},
},
}

for _, tt := range tests {
Expand Down
Binary file not shown.
29 changes: 29 additions & 0 deletions pkg/fanal/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"os"
"os/exec"
"path/filepath"
"unicode"

xio "github.com/aquasecurity/trivy/pkg/x/io"
)
Expand Down Expand Up @@ -93,3 +94,31 @@ func IsBinary(content xio.ReadSeekerAt, fileSize int64) (bool, error) {

return false, nil
}

func ExtractPrintableBytes(content xio.ReadSeekerAt) ([]byte, error) {
var printalbe []byte
current := make([]byte, 1)

wasReadable := false

for {
_, err := content.Read(current)
if err == io.EOF {
break
} else if err != nil {
return nil, err
}
if unicode.IsPrint(rune(current[0])) {
if !wasReadable {
printalbe = append(printalbe, byte(' '))
wasReadable = true
}
printalbe = append(printalbe, current[0])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This implementation concats all single printable characters, leading to the long string. Even in a binary file, there are many printable character strings when viewed on a per-byte basis. I think the strings way, setting the minimum length, is better like I shared.

} else {
wasReadable = false
}

}

return printalbe, nil
}