Skip to content

Commit

Permalink
refactor(license): use goyacc for license parser (#3824)
Browse files Browse the repository at this point in the history
  • Loading branch information
knqyf263 authored Mar 14, 2023
1 parent 00c763b commit 2bb25e7
Show file tree
Hide file tree
Showing 22 changed files with 1,488 additions and 700 deletions.
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ $(GOBIN)/labeler:
$(GOBIN)/easyjson:
go install github.com/mailru/easyjson/[email protected]

$(GOBIN)/goyacc:
go install golang.org/x/tools/cmd/goyacc@latest

.PHONY: wire
wire: $(GOBIN)/wire
wire gen ./pkg/commands/... ./pkg/rpc/...
Expand Down Expand Up @@ -133,3 +136,8 @@ mkdocs-serve:
.PHONY: easyjson
easyjson: $(GOBIN)/easyjson
easyjson pkg/module/serialize/types.go

# Generate license parser with goyacc
.PHONY: yacc
yacc: $(GOBIN)/goyacc
go generate ./pkg/licensing/expression/...
16 changes: 13 additions & 3 deletions pkg/fanal/analyzer/pkg/dpkg/copyright.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,7 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens
// cf. https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#:~:text=The%20debian%2Fcopyright%20file%20must,in%20the%20Debian%20Policy%20Manual.
l := strings.TrimSpace(line[8:])

// Very rarely has below phrases
l = strings.TrimPrefix(l, "The main library is licensed under ")
l = strings.TrimSuffix(l, " license")
l = normalizeLicense(l)
if len(l) > 0 {
// Split licenses without considering "and"/"or"
// examples:
Expand Down Expand Up @@ -140,3 +138,15 @@ func (a *dpkgLicenseAnalyzer) Type() analyzer.Type {
func (a *dpkgLicenseAnalyzer) Version() int {
return dpkgLicenseAnalyzerVersion
}

// normalizeLicense returns a normalized license identifier in a heuristic way
func normalizeLicense(s string) string {
// "The MIT License (MIT)" => "The MIT License"
s, _, _ = strings.Cut(s, "(")

// Very rarely has below phrases
s = strings.TrimPrefix(s, "The main library is licensed under ")
s = strings.TrimSuffix(s, " license")

return strings.TrimSpace(s)
}
9 changes: 9 additions & 0 deletions pkg/licensing/category.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,15 @@ const (
FacebookExamples = "Facebook-Examples"
FreeImage = "FreeImage"
FTL = "FTL"
GFDL11WithInvariants = "GFDL-1.1-invariants"
GFDL11NoInvariants = "GFDL-1.1-no-invariants"
GFDL11 = "GFDL-1.1"
GFDL12WithInvariants = "GFDL-1.2-invariants"
GFDL12NoInvariants = "GFDL-1.2-no-invariants"
GFDL12 = "GFDL-1.2"
GFDL13WithInvariants = "GFDL-1.3-invariants"
GFDL13NoInvariants = "GFDL-1.3-no-invariants"
GFDL13 = "GFDL-1.3"
GPL10 = "GPL-1.0"
GPL20 = "GPL-2.0"
GPL20withautoconfexception = "GPL-2.0-with-autoconf-exception"
Expand Down
100 changes: 53 additions & 47 deletions pkg/licensing/expression/expression.go
Original file line number Diff line number Diff line change
@@ -1,74 +1,80 @@
package expression

import (
"fmt"
"strings"
"unicode"

"github.com/aquasecurity/trivy/pkg/licensing/expression/lexer"
"github.com/aquasecurity/trivy/pkg/licensing/expression/parser"
"golang.org/x/xerrors"
)

type Operator string

const (
AND Operator = "AND"
OR Operator = "OR"
WITH Operator = "WITH"
var (
ErrInvalidExpression = xerrors.New("invalid expression error")
)

func (o Operator) String() string {
return fmt.Sprintf(" %s ", string(o))
type NormalizeFunc func(license string) string

func parse(license string) (Expression, error) {
l := NewLexer(strings.NewReader(license))
if yyParse(l) != 0 {
return nil, xerrors.Errorf("license parse error: %w", l.Err())
} else if err := l.Err(); err != nil {
return nil, err
}

return l.result, nil
}

func Normalize(license string, fn ...parser.NormalizeFunc) string {
lex := lexer.New(license)
licenseParser := parser.New(lex).RegisterNormalizeFunc(
fn...,
)
expression, err := licenseParser.Parse()
func Normalize(license string, fn ...NormalizeFunc) (string, error) {
expr, err := parse(license)
if err != nil {
return license
return "", xerrors.Errorf("license (%s) parse error: %w", license, err)
}
return licenseParser.Normalize(expression)
}
expr = normalize(expr, fn...)

func Join(elems []string, sep Operator) string {
var licenses []string
for i, license := range elems {
var mid Operator
if sep == AND {
mid = OR
} else if sep == OR {
mid = AND
}
return expr.String(), nil
}

if i != 0 && strings.Contains(strings.ToUpper(license), mid.String()) {
license = fmt.Sprintf("(%s)", license)
func normalize(expr Expression, fn ...NormalizeFunc) Expression {
switch e := expr.(type) {
case SimpleExpr:
for _, f := range fn {
e.license = f(e.license)
}
licenses = append(licenses, license)
return e
case CompoundExpr:
e.left = normalize(e.left, fn...)
e.right = normalize(e.right, fn...)
e.conjunction.literal = strings.ToUpper(e.conjunction.literal) // e.g. "and" => "AND"
return e
}

return strings.Join(licenses, sep.String())
return expr
}

// NormalizeForSPDX is normalized license-id replace ' ' to '-'.
// NormalizeForSPDX replaces ' ' to '-' in license-id.
// SPDX license MUST NOT be white space between a license-id.
// There MUST be white space on either side of the operator "WITH".
// ref: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions
func NormalizeForSPDX(name string) string {
i := strings.Index(strings.ToUpper(name), WITH.String())
if i < 0 {
return strings.Replace(name, " ", "-", -1)
func NormalizeForSPDX(s string) string {
var b strings.Builder
for _, c := range s {
// idstring = 1*(ALPHA / DIGIT / "-" / "." )
if isAlphabet(c) || unicode.IsNumber(c) || c == '-' || c == '.' {
_, _ = b.WriteRune(c)
} else if c == ':' {
// TODO: Support DocumentRef
_, _ = b.WriteRune(c)
} else {
// Replace invalid characters with '-'
_, _ = b.WriteRune('-')
}
}
return b.String()
}

// Convert "WITH" expression split by " " to "-".
// examples:
// GPL-2+ with distribution exception => GPL-2+ with distribution-exception
// GPL-2 with Linux-syscall-note exception => GPL-2 with Linux-syscall-note-exception
// AFL 2.0 with Linux-syscall-note exception => AFL-2.0 with Linux-syscall-note-exception
withSection := strings.Replace(name[i+len(WITH.String()):], " ", "-", -1)
if i > 0 {
return strings.Replace(name[:i], " ", "-", -1) + WITH.String() + withSection
func isAlphabet(r rune) bool {
if (r < 'a' || r > 'z') && (r < 'A' || r > 'Z') {
return false
}
return name
return true
}
77 changes: 25 additions & 52 deletions pkg/licensing/expression/expression_test.go
Original file line number Diff line number Diff line change
@@ -1,83 +1,56 @@
package expression

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestNormalizeForSPDX(t *testing.T) {
func TestNormalize(t *testing.T) {
tests := []struct {
name string
license string
fn NormalizeFunc
want string
wantErr string
}{
{
name: "happy path",
name: "SPDX, space",
license: "AFL 2.0",
fn: NormalizeForSPDX,
want: "AFL-2.0",
},
{
name: "happy path with WITH section",
name: "SPDX, exception",
license: "AFL 2.0 with Linux-syscall-note exception",
fn: NormalizeForSPDX,
want: "AFL-2.0 WITH Linux-syscall-note-exception",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, NormalizeForSPDX(tt.license), "NormalizeWithExpression(%v)", tt.license)
})
}
}

func TestJoin(t *testing.T) {
tests := []struct {
name string
inputElements []string
inputOperator Operator
expect string
}{
{
name: "happy path single license",
inputElements: []string{"MIT"},
inputOperator: AND,
expect: "MIT",
name: "SPDX, invalid chars",
license: "LGPL_2.1_only or MIT OR BSD-3>Clause",
fn: NormalizeForSPDX,
want: "LGPL-2.1-only OR MIT OR BSD-3-Clause",
},
{
name: "happy path multi license",
inputElements: []string{"MIT", "GPL1.0"},
inputOperator: AND,
expect: "MIT AND GPL1.0",
},
{
name: "happy path multi license with AND operator",
inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"},
inputOperator: AND,
expect: "MIT AND GPL1.0 AND GPL2.0",
},
{
name: "happy path multi license with OR operator",
inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"},
inputOperator: OR,
expect: "MIT OR GPL1.0 OR GPL2.0",
},
{
name: "happy path multi license with OR operator, separator AND",
inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"},
inputOperator: AND,
expect: "MIT AND (GPL1.0 OR GPL2.0)",
},
{
name: "happy path multi license with AND operator, separator OR",
inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"},
inputOperator: OR,
expect: "MIT OR (GPL1.0 AND GPL2.0)",
name: "upper",
license: "LGPL-2.1-only OR MIT",
fn: strings.ToUpper,
want: "LGPL-2.1-ONLY OR MIT",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := Join(tt.inputElements, tt.inputOperator)
assert.Equal(t, tt.expect, got)
got, err := Normalize(tt.license, tt.fn)
if tt.wantErr != "" {
assert.ErrorContains(t, err, tt.wantErr)
return
}

require.NoError(t, err)
assert.Equalf(t, tt.want, got, "NormalizeWithExpression(%v)", tt.license)
})
}
}
Loading

0 comments on commit 2bb25e7

Please sign in to comment.