Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(sbom): fix incompliant license format for spdx #3335

Merged
merged 7 commits into from
Mar 12, 2023
Merged
4 changes: 4 additions & 0 deletions pkg/fanal/analyzer/pkg/dpkg/copyright.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens
// Machine-readable format
// cf. https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#:~:text=The%20debian%2Fcopyright%20file%20must,in%20the%20Debian%20Policy%20Manual.
l := strings.TrimSpace(line[8:])

// Very rarely has below phrases
l = strings.TrimPrefix(l, "The main library is licensed under ")
l = strings.TrimSuffix(l, " license")
if len(l) > 0 {
// Split licenses without considering "and"/"or"
// examples:
Expand Down
74 changes: 74 additions & 0 deletions pkg/licensing/expression/expression.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package expression

import (
"fmt"
"strings"

"github.com/aquasecurity/trivy/pkg/licensing/expression/lexer"
"github.com/aquasecurity/trivy/pkg/licensing/expression/parser"
)

type Operator string

const (
AND Operator = "AND"
OR Operator = "OR"
WITH Operator = "WITH"
)

func (o Operator) String() string {
return fmt.Sprintf(" %s ", string(o))
}

func Normalize(license string, fn ...parser.NormalizeFunc) string {
lex := lexer.New(license)
licenseParser := parser.New(lex).RegisterNormalizeFunc(
fn...,
)
expression, err := licenseParser.Parse()
if err != nil {
return license
}
return licenseParser.Normalize(expression)
}

func Join(elems []string, sep Operator) string {
var licenses []string
for i, license := range elems {
var mid Operator
if sep == AND {
mid = OR
} else if sep == OR {
mid = AND
}

if i != 0 && strings.Contains(strings.ToUpper(license), mid.String()) {
license = fmt.Sprintf("(%s)", license)
}
licenses = append(licenses, license)
}

return strings.Join(licenses, sep.String())
}

// NormalizeForSPDX is normalized license-id replace ' ' to '-'.
// SPDX license MUST NOT be white space between a license-id.
// There MUST be white space on either side of the operator "WITH".
// ref: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions
func NormalizeForSPDX(name string) string {
i := strings.Index(strings.ToUpper(name), WITH.String())
if i < 0 {
return strings.Replace(name, " ", "-", -1)
}

// Convert "WITH" expression split by " " to "-".
// examples:
// GPL-2+ with distribution exception => GPL-2+ with distribution-exception
// GPL-2 with Linux-syscall-note exception => GPL-2 with Linux-syscall-note-exception
// AFL 2.0 with Linux-syscall-note exception => AFL-2.0 with Linux-syscall-note-exception
withSection := strings.Replace(name[i+len(WITH.String()):], " ", "-", -1)
if i > 0 {
return strings.Replace(name[:i], " ", "-", -1) + WITH.String() + withSection
}
return name
}
83 changes: 83 additions & 0 deletions pkg/licensing/expression/expression_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package expression

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestNormalizeForSPDX(t *testing.T) {
tests := []struct {
name string
license string
want string
}{
{
name: "happy path",
license: "AFL 2.0",
want: "AFL-2.0",
},
{
name: "happy path with WITH section",
license: "AFL 2.0 with Linux-syscall-note exception",
want: "AFL-2.0 WITH Linux-syscall-note-exception",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, NormalizeForSPDX(tt.license), "NormalizeWithExpression(%v)", tt.license)
})
}
}

func TestJoin(t *testing.T) {
tests := []struct {
name string
inputElements []string
inputOperator Operator
expect string
}{
{
name: "happy path single license",
inputElements: []string{"MIT"},
inputOperator: AND,
expect: "MIT",
},
{
name: "happy path multi license",
inputElements: []string{"MIT", "GPL1.0"},
inputOperator: AND,
expect: "MIT AND GPL1.0",
},
{
name: "happy path multi license with AND operator",
inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"},
inputOperator: AND,
expect: "MIT AND GPL1.0 AND GPL2.0",
},
{
name: "happy path multi license with OR operator",
inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"},
inputOperator: OR,
expect: "MIT OR GPL1.0 OR GPL2.0",
},
{
name: "happy path multi license with OR operator, separator AND",
inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"},
inputOperator: AND,
expect: "MIT AND (GPL1.0 OR GPL2.0)",
},
{
name: "happy path multi license with AND operator, separator OR",
inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"},
inputOperator: OR,
expect: "MIT OR (GPL1.0 AND GPL2.0)",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := Join(tt.inputElements, tt.inputOperator)
assert.Equal(t, tt.expect, got)
})
}
}
85 changes: 85 additions & 0 deletions pkg/licensing/expression/lexer/lexer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package lexer

import (
"github.com/aquasecurity/trivy/pkg/licensing/expression/token"
)

type Lexer struct {
input string
position int
readPosition int
ch byte
}

func New(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
return l
}

func (l *Lexer) NextToken() token.Token {
var tok token.Token

l.skipWhitespace()

switch l.ch {
case 0:
tok = newToken(token.EOF, l.ch)
case '(':
tok = newToken(token.LPAREN, l.ch)
case ')':
tok = newToken(token.RPAREN, l.ch)
default:
if isLetter(l.ch) {
tok.Literal = l.readIdentifier()
tok.Type = token.LookupIdent(tok.Literal)
return tok
} else {
tok = newToken(token.ILLEGAL, l.ch)
}
}
l.readChar()
return tok
}

func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' ||
'A' <= ch && ch <= 'Z' ||
'0' <= ch && ch <= '9' ||
ch == '_' ||
ch == '+' ||
ch == '.' ||
ch == '-' ||
ch == '/' ||
ch == ':' ||
ch == '='
}

func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}

func newToken(tokenType token.TokenType, ch byte) token.Token {
return token.Token{Type: tokenType, Literal: string(ch)}
}

func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}

func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
// 0 is ASCII NUL
l.ch = 0
} else {
l.ch = l.input[l.readPosition]
}
l.position = l.readPosition
l.readPosition++
}
143 changes: 143 additions & 0 deletions pkg/licensing/expression/lexer/lexer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package lexer

import (
"testing"

"github.com/aquasecurity/trivy/pkg/licensing/expression/token"

"github.com/stretchr/testify/assert"
)

func TestNextToken(t *testing.T) {
tests := []struct {
name string
licenseExpression string
expectTokens []token.Token
}{
{
name: "empty input",
licenseExpression: "",
expectTokens: []token.Token{
{
Type: token.EOF,
Literal: string(byte(0)),
},
},
},
{
name: "single ident",
licenseExpression: "GPL1.0+",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
},
},
{
name: "multi ident",
licenseExpression: "Public Domain",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "Public",
},
{
Type: token.IDENT,
Literal: "Domain",
},
},
},
{
name: "AND OR operator",
licenseExpression: "Public Domain AND GPL1.0+ OR GPL2.0_or_later",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "Public",
},
{
Type: token.IDENT,
Literal: "Domain",
},
{
Type: token.AND,
Literal: "AND",
},
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
{
Type: token.OR,
Literal: "OR",
},
{
Type: token.IDENT,
Literal: "GPL2.0_or_later",
},
},
},
{
name: "PAREN operator",
licenseExpression: "(GPL1.0+ OR GPL2.0)",
expectTokens: []token.Token{
{
Type: token.LPAREN,
Literal: "(",
},
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
{
Type: token.OR,
Literal: "OR",
},
{
Type: token.IDENT,
Literal: "GPL2.0",
},
{
Type: token.RPAREN,
Literal: ")",
},
},
},
{
name: "illegal string",
licenseExpression: "GPL1.0+" + string(byte(0x20)) + "あ" + "🇯🇵" + "AND LGPL1.0",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
{
Type: token.AND,
Literal: "AND",
},
{
Type: token.IDENT,
Literal: "LGPL1.0+",
},
},
},
}

for _, tt := range tests {
l := New(tt.licenseExpression)
for _, expect := range tt.expectTokens {
tok := l.NextToken()

// Skip literal
if tok.Type == token.ILLEGAL {
continue
}

t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, expect.Type, tok.Type)
assert.Equal(t, expect.Literal, tok.Literal)
})
}
}
}
Loading