From 268734be35c12644795938d96073b19ecc2a019e Mon Sep 17 00:00:00 2001 From: Rob Percival Date: Wed, 12 Jun 2019 16:55:07 +0100 Subject: [PATCH] Modify licenses tool to use "golang.org/x/tools/go/packages" Using this package, instead of "go/build", enables the tool to work with packages that are using Go Modules. It also transparently supports $GOFLAGS and handles walking the package dependency tree, which makes it possible to delete some code. * Change Library.Packages to []string No longer exposes implementation detail of package used for walking dependency graph. This makes it easier to make changes. * Terminate findUpwards() if the root directory is reached With Go Modules enabled, packages can be checked out to locations outside of $GOPATH/src. This means we can no longer assume that this will be a parent directory (which is where findUpwards() would usually terminate). * Log warning if package contains non-Go code No longer treating this as a terminal error. However, user should still investigate these dependencies to confirm no further dependencies exist. --- README.md | 56 ++++++++++++++ csv.go | 3 +- licenses/find.go | 18 +++-- licenses/find_test.go | 14 +--- licenses/library.go | 162 +++++++++++++++++++++------------------ licenses/library_test.go | 28 +++---- main.go | 48 ------------ save.go | 3 +- 8 files changed, 174 insertions(+), 158 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..f064709 --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +# Licenses tool + +This tool analyzes the dependency tree of a Go package/binary. It can output a report on the libraries used and under what license they can be used. It can also collect all of the license documents, copyright notices and source code into a directory in order to comply with license terms on redistribution. + +## Reports + +```shell +$ licenses csv "github.com/google/trillian/server/trillian_log_server" +google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/master/LICENSE,Apache-2.0 +go.opencensus.io,https://github.com/census-instrumentation/opencensus-go/blob/master/LICENSE,Apache-2.0 +github.com/google/certificate-transparency-go,https://github.com/google/certificate-transparency-go/blob/master/LICENSE,Apache-2.0 +github.com/jmespath/go-jmespath,https://github.com/aws/aws-sdk-go/blob/master/vendor/github.com/jmespath/go-jmespath/LICENSE,Apache-2.0 +golang.org/x/text,https://go.googlesource.com/text/+/refs/heads/master/LICENSE,BSD-3-Clause +golang.org/x/sync/semaphore,https://go.googlesource.com/sync/+/refs/heads/master/LICENSE,BSD-3-Clause +github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/master/LICENSE,Apache-2.0 +github.com/beorn7/perks/quantile,https://github.com/beorn7/perks/blob/master/LICENSE,MIT +``` + +This command prints out a comma-separated report (CSV) listing the libraries used by a binary/package, the URL where their licenses can be viewed and the type of license. A library is considered to be one or more Go packages that share a license file. + +URLs will not be available if the library is not checked out as a Git repository (e.g. as is the case when Go Modules are enabled). + +## Complying with license terms + +```shell +$ licenses save "github.com/google/trillian/server/trillian_log_server" --save_dir="/tmp/trillian_log_server" +``` + +This command analyzes a binary/package's dependencies and determines what needs to be redistributed alongside that binary/package in order to comply with the license terms. This typically includes the license itself and a copyright notice, but may also include the dependency's source code. All of the required artifacts will be saved in the directory indicated by `--save_dir`. + +## Warnings and errors + +The tool will log warnings and errors in some scenarios. This section provides guidance on addressing them. + +### Dependency contains non-Go code + +A warning will be logged when a dependency contains non-Go code. This is because it is not possible to check the non-Go code for further dependencies, which may conceal additional license requirements. You should investigate this code to determine whether it has dependencies and take action to comply with their license terms. + +### Error discovering URL + +In order to determine the URL where a license file can be viewed, this tool performs the following steps: + +1) Locates the license file on disk. +2) Assuming that it is in a Git repository, inspects the repository's config to find the URL of the remote "origin" repository. +3) Adds the license file path to this URL. + +For this to work, the remote repository named "origin" must have a HTTPS URL. You can check this by running the following commands, +inserting the path mentioned in the log message: + +```shell +$ cd "path/mentioned/in/log/message" +$ git remote get-url origin +https://github.com/google/trillian.git +``` + +If you want the tool to use a different remote repository, use the `--git_remote` flag. You can pass this flag repeatedly to make the tool try a number of different remotes. \ No newline at end of file diff --git a/csv.go b/csv.go index 4e03649..650347c 100644 --- a/csv.go +++ b/csv.go @@ -15,6 +15,7 @@ package main import ( + "context" "encoding/csv" "os" "strings" @@ -50,7 +51,7 @@ func csvMain(_ *cobra.Command, args []string) error { return err } - libs, err := libraries(importPath) + libs, err := licenses.Libraries(context.Background(), importPath) if err != nil { return err } diff --git a/licenses/find.go b/licenses/find.go index 5b68dc7..51062cb 100644 --- a/licenses/find.go +++ b/licenses/find.go @@ -35,28 +35,34 @@ var ( ) // Find returns the file path of the license for this package. -func Find(pkg *build.Package) (string, error) { +func Find(dir string) (string, error) { var stopAt []*regexp.Regexp stopAt = append(stopAt, srcDirRegexps...) stopAt = append(stopAt, vendorRegexp) - return findUpwards(pkg.Dir, licenseRegexp, stopAt) + return findUpwards(dir, licenseRegexp, stopAt) } func findUpwards(dir string, r *regexp.Regexp, stopAt []*regexp.Regexp) (string, error) { start := dir + // Stop once dir matches a stopAt regexp or dir is the filesystem root for !matchAny(stopAt, dir) { - files, err := ioutil.ReadDir(dir) + dirContents, err := ioutil.ReadDir(dir) if err != nil { return "", err } - for _, f := range files { + for _, f := range dirContents { if r.MatchString(f.Name()) { return filepath.Join(dir, f.Name()), nil } } - dir = filepath.Dir(dir) + parent := filepath.Dir(dir) + if parent == dir { + // Can't go any higher up the directory tree. + break + } + dir = parent } - return "", fmt.Errorf("no file matching %q found for %s", r, start) + return "", fmt.Errorf("no file/directory matching regexp %q found for %s", r, start) } func matchAny(patterns []*regexp.Regexp, s string) bool { diff --git a/licenses/find_test.go b/licenses/find_test.go index 9995345..d2ab2b0 100644 --- a/licenses/find_test.go +++ b/licenses/find_test.go @@ -23,25 +23,19 @@ import ( func TestFind(t *testing.T) { for _, test := range []struct { desc string - importPath string - workingDir string - importMode build.ImportMode + dir string wantLicensePath string }{ { desc: "Trillian license", - importPath: "github.com/google/trillian/scripts/licenses/licenses", + dir: filepath.Join(build.Default.GOPATH, "src/github.com/google/trillian/scripts/licenses/licenses"), wantLicensePath: filepath.Join(build.Default.GOPATH, "src/github.com/google/trillian/LICENSE"), }, } { t.Run(test.desc, func(t *testing.T) { - pkg, err := build.Import(test.importPath, test.workingDir, test.importMode) - if err != nil { - t.Fatalf("build.Import(%q, %q, %v) = (_, %q), want (_, nil)", test.importPath, test.workingDir, test.importMode, err) - } - licensePath, err := Find(pkg) + licensePath, err := Find(test.dir) if err != nil || licensePath != test.wantLicensePath { - t.Fatalf("Find(%v) = (%#v, %q), want (%q, nil)", pkg, licensePath, err, test.wantLicensePath) + t.Fatalf("Find(%v) = (%#v, %q), want (%q, nil)", test.dir, licensePath, err, test.wantLicensePath) } }) } diff --git a/licenses/library.go b/licenses/library.go index 1060e09..0146a0f 100644 --- a/licenses/library.go +++ b/licenses/library.go @@ -15,78 +15,134 @@ package licenses import ( + "context" "fmt" "go/build" + "path/filepath" "sort" - "sync" + "strings" "github.com/golang/glog" -) - -var ( - pkgCache sync.Map + "golang.org/x/tools/go/packages" ) // Library is a collection of packages covered by the same license file. type Library struct { - Packages []*build.Package + // LicensePath is the path of the file containing the library's license. LicensePath string + // Packages contains import paths for Go packages in this library. + // It may not be the complete set of all packages in the library. + Packages []string +} + +// PackagesError aggregates all Packages[].Errors into a single error. +type PackagesError struct { + pkgs []*packages.Package +} + +func (e PackagesError) Error() string { + var str strings.Builder + str.WriteString(fmt.Sprintf("errors for %q:", e.pkgs)) + packages.Visit(e.pkgs, nil, func(pkg *packages.Package) { + for _, err := range pkg.Errors { + str.WriteString(fmt.Sprintf("\n%s: %s", pkg.PkgPath, err)) + } + }) + return str.String() } // Libraries returns the collection of libraries used by this package, directly or transitively. // A library is a collection of one or more packages covered by the same license file. // Packages not covered by a license will be returned as individual libraries. // Standard library packages will be ignored. -func Libraries(ctx *build.Context, pkg *build.Package) ([]*Library, error) { - pkgs := map[string]*build.Package{pkg.ImportPath: pkg} - if err := dependencies(ctx, pkg, pkgs); err != nil { +func Libraries(ctx context.Context, importPaths ...string) ([]*Library, error) { + cfg := &packages.Config{ + Context: ctx, + Mode: packages.NeedImports | packages.NeedDeps | packages.NeedFiles | packages.NeedName, + } + + rootPkgs, err := packages.Load(cfg, importPaths...) + if err != nil { return nil, err } - pkgsByLicense := make(map[string][]*build.Package) - for _, p := range pkgs { + + pkgs := map[string]*packages.Package{} + pkgsByLicense := make(map[string][]*packages.Package) + errorOccurred := false + packages.Visit(rootPkgs, func(p *packages.Package) bool { + if len(p.Errors) > 0 { + errorOccurred = true + return false + } if isStdLib(p) { // No license requirements for the Go standard library. - continue + return false + } + if len(p.OtherFiles) > 0 { + glog.Warningf("%q contains non-Go code that can't be inspected for further dependencies:\n%s", p.PkgPath, strings.Join(p.OtherFiles, "\n")) + } + var pkgDir string + switch { + case len(p.GoFiles) > 0: + pkgDir = filepath.Dir(p.GoFiles[0]) + case len(p.CompiledGoFiles) > 0: + pkgDir = filepath.Dir(p.CompiledGoFiles[0]) + case len(p.OtherFiles) > 0: + pkgDir = filepath.Dir(p.OtherFiles[0]) + default: + // This package is empty - nothing to do. + return true } - licensePath, err := Find(p) + licensePath, err := Find(pkgDir) if err != nil { - glog.Errorf("Failed to find license for %s: %v", p.ImportPath, err) + glog.Errorf("Failed to find license for %s: %v", p.PkgPath, err) } + pkgs[p.PkgPath] = p pkgsByLicense[licensePath] = append(pkgsByLicense[licensePath], p) + return true + }, nil) + if errorOccurred { + return nil, PackagesError{ + pkgs: rootPkgs, + } } + var libraries []*Library for licensePath, pkgs := range pkgsByLicense { if licensePath == "" { // No license for these packages - return each one as a separate library. for _, p := range pkgs { libraries = append(libraries, &Library{ - Packages: []*build.Package{p}, + Packages: []string{p.PkgPath}, }) } continue } - libraries = append(libraries, &Library{ + lib := &Library{ LicensePath: licensePath, - Packages: pkgs, - }) + } + for _, pkg := range pkgs { + lib.Packages = append(lib.Packages, pkg.PkgPath) + } + libraries = append(libraries, lib) } return libraries, nil } // Name is the common prefix of the import paths for all of the packages in this library. func (l *Library) Name() string { - if len(l.Packages) == 0 { + return commonAncestor(l.Packages) +} + +func commonAncestor(paths []string) string { + if len(paths) == 0 { return "" } - if len(l.Packages) == 1 { - return l.Packages[0].ImportPath - } - var importPaths []string - for _, pkg := range l.Packages { - importPaths = append(importPaths, pkg.ImportPath) + if len(paths) == 1 { + return paths[0] } - sort.Strings(importPaths) - min, max := importPaths[0], importPaths[len(importPaths)-1] + sort.Strings(paths) + min, max := paths[0], paths[len(paths)-1] lastSlashIndex := 0 for i := 0; i < len(min) && i < len(max); i++ { if min[i] != max[i] { @@ -103,54 +159,10 @@ func (l *Library) String() string { return l.Name() } -// importPackage returns information about the package identified by the given import path. -// If there is a "vendor" directory in workingDir, packages in that directory will take precedence -// over packages with the same import path found elsewhere. -func importPackage(ctx *build.Context, importPath string, workingDir string) (*build.Package, error) { - cacheKey := workingDir + ":" + importPath - if pkg, ok := pkgCache.Load(cacheKey); ok { - return pkg.(*build.Package), nil - } - - pkg, err := ctx.Import(importPath, workingDir, 0) - if err != nil { - return nil, err - } - - pkgCache.Store(cacheKey, pkg) - return pkg, nil -} - // isStdLib returns true if this package is part of the Go standard library. -func isStdLib(pkg *build.Package) bool { - return pkg.Root == build.Default.GOROOT -} - -// dependencies finds the Go packages used by this package, directly or transitively. -// They are added to the provided deps map. -func dependencies(ctx *build.Context, pkg *build.Package, deps map[string]*build.Package) error { - for _, imp := range pkg.Imports { - if imp == "C" { - return fmt.Errorf("%s has a dependency on C code, which cannot be inspected for further dependencies", pkg.ImportPath) - } - if _, ok := deps[imp]; ok { - // Already have this dependency in deps (and therefore all of its dependencies too) - continue - } - impPkg, err := importPackage(ctx, imp, pkg.Dir) - if err != nil { - return fmt.Errorf("%s -> %v", pkg.ImportPath, err) - } - deps[imp] = impPkg - if isStdLib(impPkg) { - // Don't delve into standard library dependencies - that'll just lead to dependencies on other parts of the standard library, - // which isn't of interest (no license requirements for the standard library). - continue - } - // Collect transitive dependencies - if err := dependencies(ctx, impPkg, deps); err != nil { - return fmt.Errorf("%s -> %v", pkg.ImportPath, err) - } +func isStdLib(pkg *packages.Package) bool { + if len(pkg.GoFiles) == 0 { + return false } - return nil + return strings.HasPrefix(pkg.GoFiles[0], build.Default.GOROOT) } diff --git a/licenses/library_test.go b/licenses/library_test.go index cd24376..3137fa1 100644 --- a/licenses/library_test.go +++ b/licenses/library_test.go @@ -15,7 +15,7 @@ package licenses import ( - "go/build" + "context" "testing" "github.com/google/go-cmp/cmp" @@ -26,8 +26,6 @@ func TestLibraries(t *testing.T) { for _, test := range []struct { desc string importPath string - workingDir string - importMode build.ImportMode wantLibs []string }{ { @@ -49,20 +47,16 @@ func TestLibraries(t *testing.T) { }, } { t.Run(test.desc, func(t *testing.T) { - pkg, err := build.Import(test.importPath, test.workingDir, test.importMode) + gotLibs, err := Libraries(context.Background(), test.importPath) if err != nil { - t.Fatalf("build.Import(%q, %q, %v) = (_, %q), want (_, nil)", test.importPath, test.workingDir, test.importMode, err) - } - gotLibs, err := Libraries(&build.Default, pkg) - if err != nil { - t.Fatalf("Libraries(_, %v) = (_, %q), want (_, nil)", pkg, err) + t.Fatalf("Libraries(_, %q) = (_, %q), want (_, nil)", test.importPath, err) } var gotLibNames []string for _, lib := range gotLibs { gotLibNames = append(gotLibNames, lib.Name()) } if diff := cmp.Diff(test.wantLibs, gotLibNames, cmpopts.SortSlices(func(x, y string) bool { return x < y })); diff != "" { - t.Errorf("Libraries(_, %v): diff (-want +got)\n%s", pkg, diff) + t.Errorf("Libraries(_, %q): diff (-want +got)\n%s", test.importPath, diff) } }) } @@ -82,8 +76,8 @@ func TestLibraryName(t *testing.T) { { desc: "Library with 1 package", lib: &Library{ - Packages: []*build.Package{ - {ImportPath: "github.com/google/trillian/crypto"}, + Packages: []string{ + "github.com/google/trillian/crypto", }, }, wantName: "github.com/google/trillian/crypto", @@ -91,9 +85,9 @@ func TestLibraryName(t *testing.T) { { desc: "Library with 2 packages", lib: &Library{ - Packages: []*build.Package{ - {ImportPath: "github.com/google/trillian/crypto"}, - {ImportPath: "github.com/google/trillian/server"}, + Packages: []string{ + "github.com/google/trillian/crypto", + "github.com/google/trillian/server", }, }, wantName: "github.com/google/trillian", @@ -101,8 +95,8 @@ func TestLibraryName(t *testing.T) { { desc: "Vendored library", lib: &Library{ - Packages: []*build.Package{ - {ImportPath: "github.com/google/trillian/vendor/coreos/etcd"}, + Packages: []string{ + "github.com/google/trillian/vendor/coreos/etcd", }, }, wantName: "github.com/google/trillian/vendor/coreos/etcd", diff --git a/main.go b/main.go index a6a353e..c1b9d30 100644 --- a/main.go +++ b/main.go @@ -16,17 +16,10 @@ package main import ( "flag" - "fmt" - "go/build" - "os" "strings" - "github.com/google/trillian/scripts/licenses/licenses" - - "bitbucket.org/creachadair/shell" "github.com/golang/glog" "github.com/spf13/cobra" - "github.com/spf13/pflag" ) var ( @@ -36,62 +29,21 @@ var ( // Flags shared between subcommands confidenceThreshold float64 - buildTags string ) func init() { rootCmd.PersistentFlags().Float64Var(&confidenceThreshold, "confidence_threshold", 0.9, "Minimum confidence required in order to positively identify a license.") - // Go build flags, which should match flags offered by `go build` - rootCmd.PersistentFlags().StringVar(&buildTags, "tags", "", "A space-separated list of build tags to consider satisfied.") } func main() { flag.Parse() rootCmd.PersistentFlags().AddGoFlagSet(flag.CommandLine) - if err := parseGoBuildFlags(rootCmd.PersistentFlags()); err != nil { - glog.Error(err) - } if err := rootCmd.Execute(); err != nil { glog.Exit(err) } } -// libraries returns the libraries used by the package identified by importPath. -// The import path is assumed to be in the context of the current working -// directory, so vendoring and relative import paths will work. -func libraries(importPath string) ([]*licenses.Library, error) { - // Import the main package and find all of the libraries that it uses. - wd, err := os.Getwd() - if err != nil { - return nil, err - } - pkg, err := build.Import(importPath, wd, build.ImportMode(0)) - if err != nil { - return nil, err - } - buildCtx := build.Default - buildCtx.BuildTags = strings.Split(buildTags, " ") - return licenses.Libraries(&buildCtx, pkg) -} - -// parseGoBuildFlags will parse the $GOFLAGS environment variable for recognised -// flags and adopt their values. -func parseGoBuildFlags(flagset *pflag.FlagSet) error { - // Temporarily ensure that unknown flags are not treated as an error, because - // this binary doesn't support most `go build` flags. - defer func(oldValue bool) { - flagset.ParseErrorsWhitelist.UnknownFlags = oldValue - }(flagset.ParseErrorsWhitelist.UnknownFlags) - rootCmd.PersistentFlags().ParseErrorsWhitelist.UnknownFlags = true - - goFlags, ok := shell.Split(os.Getenv("GOFLAGS")) - if !ok { - return fmt.Errorf("$GOFLAGS is invalid: unclosed quotation") - } - return flagset.Parse(goFlags) -} - // Unvendor removes the "*/vendor/" prefix from the given import path, if present. func unvendor(importPath string) string { if vendorerAndVendoree := strings.SplitN(importPath, "/vendor/", 2); len(vendorerAndVendoree) == 2 { diff --git a/save.go b/save.go index 20e571f..a12e4ae 100644 --- a/save.go +++ b/save.go @@ -15,6 +15,7 @@ package main import ( + "context" "fmt" "io/ioutil" "os" @@ -81,7 +82,7 @@ func saveMain(_ *cobra.Command, args []string) error { return err } - libs, err := libraries(importPath) + libs, err := licenses.Libraries(context.Background(), importPath) if err != nil { return err }