Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(license): stop spliting a long license text #7336

Merged
merged 18 commits into from
Sep 5, 2024
Merged
2 changes: 1 addition & 1 deletion pkg/dependency/parser/python/packaging/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (p *Parser) Parse(r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependenc
}

if license == "" && h.Get("License-File") != "" {
license = "file://" + h.Get("License-File")
license = licensing.LicenseFilePrefix + h.Get("License-File")
}

return []ftypes.Package{
Expand Down
4 changes: 2 additions & 2 deletions pkg/fanal/analyzer/language/python/packaging/packaging.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,11 @@ func (a packagingAnalyzer) fillAdditionalData(fsys fs.FS, app *types.Application
// Parser adds `file://` prefix to filepath from `License-File` field
// We need to read this file to find licenses
// Otherwise, this is the name of the license
if !strings.HasPrefix(lic, "file://") {
if !strings.HasPrefix(lic, licensing.LicenseFilePrefix) {
licenses = append(licenses, lic)
continue
}
licenseFilePath := path.Base(strings.TrimPrefix(lic, "file://"))
licenseFilePath := path.Base(strings.TrimPrefix(lic, licensing.LicenseFilePrefix))

findings, err := classifyLicense(app.FilePath, licenseFilePath, a.licenseClassifierConfidenceLevel, fsys)
if err != nil {
Expand Down
45 changes: 45 additions & 0 deletions pkg/licensing/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ var mapping = map[string]string{
"PUBLIC DOMAIN": Unlicense,
}

const (
LicenseTextPrefix = "text://"
LicenseFilePrefix = "file://"
CustomLicensePrefix = "CUSTOM License"
)

// pythonLicenseExceptions contains licenses that we cannot separate correctly using our logic.
// first word after separator (or/and) => license name
var pythonLicenseExceptions = map[string]string{
Expand All @@ -179,6 +185,39 @@ var pythonLicenseExceptions = map[string]string{

var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)")

// Typical keywords for license texts
var licenseTextKeywords = []string{
"http://",
"https://",
"(c)",
"as-is",
";",
"hereby",
"permission to use",
"permission is",
"use in source",
"use, copy, modify",
"using",
}

func isLicenseText(str string) bool {
for _, keyword := range licenseTextKeywords {
if strings.Contains(str, keyword) {
return true
}
}
return false
}

func TrimLicenseText(text string) string {
s := strings.Split(text, " ")
n := len(s)
if n > 3 {
n = 3
}
return strings.Join(s[:n], " ") + "..."
}

func Normalize(name string) string {
name = strings.TrimSpace(name)
if l, ok := mapping[strings.ToUpper(name)]; ok {
Expand All @@ -191,6 +230,12 @@ func SplitLicenses(str string) []string {
if str == "" {
return nil
}
if isLicenseText(strings.ToLower(str)) {
return []string{
LicenseTextPrefix + str,
}
}

var licenses []string
for _, maybeLic := range licenseSplitRegexp.Split(str, -1) {
lower := strings.ToLower(maybeLic)
Expand Down
7 changes: 7 additions & 0 deletions pkg/licensing/normalize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ func TestSplitLicenses(t *testing.T) {
"Historical Permission Notice and Disclaimer (HPND)",
},
},
{
name: "License text",
license: "* Permission to use this software in any way is granted without",
licenses: []string{
"text://* Permission to use this software in any way is granted without",
},
},
}

for _, tt := range tests {
Expand Down
30 changes: 16 additions & 14 deletions pkg/rpc/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,13 +431,14 @@ func ConvertFromRPCDetectedLicenses(rpcLicenses []*common.DetectedLicense) []typ
for _, l := range rpcLicenses {
severity := dbTypes.Severity(l.Severity)
licenses = append(licenses, types.DetectedLicense{
Severity: severity.String(),
Category: ConvertFromRPCLicenseCategory(l.Category),
PkgName: l.PkgName,
FilePath: l.FilePath,
Name: l.Name,
Confidence: float64(l.Confidence),
Link: l.Link,
Severity: severity.String(),
Category: ConvertFromRPCLicenseCategory(l.Category),
PkgName: l.PkgName,
FilePath: l.FilePath,
Name: l.Name,
LicenseText: l.LicenseText,
Confidence: float64(l.Confidence),
Link: l.Link,
})
}
return licenses
Expand Down Expand Up @@ -978,13 +979,14 @@ func ConvertToRPCLicenses(licenses []types.DetectedLicense) []*common.DetectedLi
log.Warn("Severity conversion error", log.Err(err))
}
rpcLicenses = append(rpcLicenses, &common.DetectedLicense{
Severity: common.Severity(severity),
Category: ConvertToRPCLicenseCategory(l.Category),
PkgName: l.PkgName,
FilePath: l.FilePath,
Name: l.Name,
Confidence: float32(l.Confidence),
Link: l.Link,
Severity: common.Severity(severity),
Category: ConvertToRPCLicenseCategory(l.Category),
PkgName: l.PkgName,
FilePath: l.FilePath,
Name: l.Name,
LicenseText: l.LicenseText,
Confidence: float32(l.Confidence),
Link: l.Link,
})
}

Expand Down
60 changes: 32 additions & 28 deletions pkg/rpc/convert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -755,24 +755,26 @@ func TestConvertFromRPCLicenses(t *testing.T) {
name: "happy",
rpcLicenses: []*common.DetectedLicense{
{
Severity: common.Severity_HIGH,
Category: common.LicenseCategory_RESTRICTED,
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Confidence: 1,
Link: "https://some-link",
Severity: common.Severity_HIGH,
Category: common.LicenseCategory_RESTRICTED,
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
LicenseText: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
},
want: []types.DetectedLicense{
{
Severity: "HIGH",
Category: "restricted",
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Confidence: 1,
Link: "https://some-link",
Severity: "HIGH",
Category: "restricted",
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
LicenseText: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
},
},
Expand Down Expand Up @@ -801,24 +803,26 @@ func TestConvertToRPCLicenses(t *testing.T) {
name: "happy",
licenses: []types.DetectedLicense{
{
Severity: "HIGH",
Category: "restricted",
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Confidence: 1,
Link: "https://some-link",
Severity: "HIGH",
Category: "restricted",
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
LicenseText: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
},
want: []*common.DetectedLicense{
{
Severity: common.Severity_HIGH,
Category: common.LicenseCategory_RESTRICTED,
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Confidence: 1,
Link: "https://some-link",
Severity: common.Severity_HIGH,
Category: common.LicenseCategory_RESTRICTED,
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
LicenseText: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
},
},
Expand Down
48 changes: 29 additions & 19 deletions pkg/scanner/local/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,14 +261,7 @@ func (s Scanner) scanLicenses(target types.ScanTarget, options types.ScanOptions
var osPkgLicenses []types.DetectedLicense
for _, pkg := range target.Packages {
for _, license := range pkg.Licenses {
category, severity := scanner.Scan(license)
osPkgLicenses = append(osPkgLicenses, types.DetectedLicense{
Severity: severity,
Category: category,
PkgName: pkg.Name,
Name: license,
Confidence: 1.0,
})
osPkgLicenses = append(osPkgLicenses, toDetectedLicense(scanner, license, pkg.Name, ""))
}
}
results = append(results, types.Result{
Expand All @@ -282,17 +275,11 @@ func (s Scanner) scanLicenses(target types.ScanTarget, options types.ScanOptions
var langLicenses []types.DetectedLicense
for _, lib := range app.Packages {
for _, license := range lib.Licenses {
category, severity := scanner.Scan(license)
langLicenses = append(langLicenses, types.DetectedLicense{
Severity: severity,
Category: category,
PkgName: lib.Name,
Name: license,
// Lock files use app.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L245-L246
// Applications use lib.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L93-L94
FilePath: lo.Ternary(lib.FilePath != "", lib.FilePath, app.FilePath),
Confidence: 1.0,
})
// Lock files use app.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L245-L246
// Applications use lib.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L93-L94
filePath := lo.Ternary(lib.FilePath != "", lib.FilePath, app.FilePath)

langLicenses = append(langLicenses, toDetectedLicense(scanner, license, lib.Name, filePath))
}
}

Expand Down Expand Up @@ -390,6 +377,29 @@ func toDetectedMisconfiguration(res ftypes.MisconfResult, defaultSeverity dbType
}
}

func toDetectedLicense(scanner licensing.Scanner, license, pkgName, filePath string) types.DetectedLicense {
var category ftypes.LicenseCategory
var severity, licenseText string
if strings.HasPrefix(license, licensing.LicenseTextPrefix) { // License text
licenseText = strings.TrimPrefix(license, licensing.LicenseTextPrefix)
category = ftypes.CategoryUnknown
severity = dbTypes.SeverityUnknown.String()
license = licensing.CustomLicensePrefix + ": " + licensing.TrimLicenseText(licenseText)
} else { // License name
category, severity = scanner.Scan(license)
}

return types.DetectedLicense{
Severity: severity,
Category: category,
PkgName: pkgName,
FilePath: filePath,
Name: license,
LicenseText: licenseText,
Confidence: 1.0,
}
}

func ShouldScanMisconfigOrRbac(scanners types.Scanners) bool {
return scanners.AnyEnabled(types.MisconfigScanner, types.RBACScanner)
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/types/license.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ type DetectedLicense struct {
// Name holds a detected license name
Name string

// LicenseText holds a long license text if Trivy detects a license name as a license text
LicenseText string
knqyf263 marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Since this is "DetectedLicense", Text looks enough.

Suggested change
LicenseText string
Text string

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


// Confidence is level of the match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an
// exact match and 0.0 indicating a complete mismatch
Confidence float64
Expand Down
Loading