From 6e34f8b3d0c503c5d34dc8b79c0faadb14bbfbc6 Mon Sep 17 00:00:00 2001 From: DmitriyLewen Date: Thu, 25 Jul 2024 16:38:47 +0500 Subject: [PATCH] refactor: move detection of dev/direct deps to dep parser --- pkg/dependency/parser/nodejs/yarn/parse.go | 68 ++++++-- .../analyzer/language/nodejs/yarn/yarn.go | 149 +++--------------- 2 files changed, 75 insertions(+), 142 deletions(-) diff --git a/pkg/dependency/parser/nodejs/yarn/parse.go b/pkg/dependency/parser/nodejs/yarn/parse.go index 746bee991446..351226ab212a 100644 --- a/pkg/dependency/parser/nodejs/yarn/parse.go +++ b/pkg/dependency/parser/nodejs/yarn/parse.go @@ -5,6 +5,8 @@ import ( "bytes" "io" "regexp" + "slices" + "sort" "strings" "github.com/samber/lo" @@ -127,7 +129,7 @@ func ignoreProtocol(protocol string) bool { return false } -func parseResults(patternIDs map[string]string, dependsOn map[string][]string) (deps []ftypes.Dependency) { +func parseResults(patternIDs map[string]string, dependsOn map[string][]string) (deps ftypes.Dependencies) { // find dependencies by patterns for pkgID, depPatterns := range dependsOn { depIDs := lo.Map(depPatterns, func(pattern string, index int) string { @@ -269,9 +271,15 @@ func parseDependency(line string) (string, error) { } } -func (p *Parser) Parse(r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependency, error) { +func (p *Parser) Parse(r xio.ReadSeekerAt, pkgJsonDirect, pkgJsonDirectDev map[string]string) ([]ftypes.Package, []ftypes.Dependency, error) { lineNumber := 1 - var pkgs []ftypes.Package + var pkgs = make(map[string]ftypes.Package) + var directPkgs, directDevPkgs []string + + // Package.json file of project contains direct/direct Dev deps in key-value format (`name`->`version constraint` e.g. `"js-tokens": "^2.0.0"`) + // We need to get pattern to match packageID and pattern when parsing packages + directPatterns := lo.MapToSlice(pkgJsonDirect, func(name string, ver string) string { return packageID(name, ver) }) + directDevPatterns := lo.MapToSlice(pkgJsonDirectDev, func(name string, ver string) string { return packageID(name, ver) }) // patternIDs holds mapping between patterns and library IDs // e.g. ajv@^6.5.5 => ajv@6.10.0 @@ -291,12 +299,19 @@ func (p *Parser) Parse(r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependenc } pkgID := packageID(lib.Name, lib.Version) + pkg := ftypes.Package{ + ID: pkgID, + Name: lib.Name, + Version: lib.Version, + Locations: []ftypes.Location{lib.Location}, + } for _, pattern := range lib.Patterns { - // Use `@latest` ID for packages with `pattern` that uses `latest` version. - // This is necessary to find direct dependencies when matching against the associated `package.json` file. - // pkg.ID will be updated to Trivy ID format (`@`) later after checking `package.json` file. - if _, ver, _ := strings.Cut(pattern, "@"); ver == "latest" { - pkgID = pattern + // Update `Relationship` and `Dev` fields for Direct pkgs + if slices.Contains(directDevPatterns, pattern) { + directDevPkgs = append(directDevPkgs, pkgID) + } + if slices.Contains(directPatterns, pattern) { + directPkgs = append(directPkgs, pkgID) } // e.g. // combined-stream@^1.0.6 => combined-stream@1.0.8 @@ -307,12 +322,7 @@ func (p *Parser) Parse(r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependenc } } - pkgs = append(pkgs, ftypes.Package{ - ID: pkgID, - Name: lib.Name, - Version: lib.Version, - Locations: []ftypes.Location{lib.Location}, - }) + pkgs[pkgID] = pkg } if err := scanner.Err(); err != nil { @@ -322,7 +332,35 @@ func (p *Parser) Parse(r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependenc // Replace dependency patterns with library IDs // e.g. ajv@^6.5.5 => ajv@6.10.0 deps := parseResults(patternIDs, dependsOn) - return pkgs, deps, nil + + // Walk to dependsOn and update `relationship` and `Dev` fields + depsMap := lo.SliceToMap(deps, func(dep ftypes.Dependency) (string, []string) { return dep.ID, dep.DependsOn }) + for _, pkgID := range directDevPkgs { + walkDependencies(pkgs, pkgID, depsMap, ftypes.RelationshipDirect, true) + } + for _, pkgID := range directPkgs { + walkDependencies(pkgs, pkgID, depsMap, ftypes.RelationshipDirect, false) + } + + pkgSlice := lo.Values(pkgs) + sort.Sort(ftypes.Packages(pkgSlice)) + sort.Sort(deps) + + return pkgSlice, deps, nil +} + +func walkDependencies(pkgs map[string]ftypes.Package, pkgID string, deps map[string][]string, relationship ftypes.Relationship, dev bool) { + pkg := pkgs[pkgID] + // Update pkg fields + pkg.Relationship = relationship + pkg.Indirect = lo.Ternary(relationship == ftypes.RelationshipDirect, false, true) + pkg.Dev = dev + pkgs[pkgID] = pkg + + // Update child dependencies + for _, depID := range deps[pkgID] { + walkDependencies(pkgs, depID, deps, ftypes.RelationshipIndirect, dev) + } } func packageID(name, version string) string { diff --git a/pkg/fanal/analyzer/language/nodejs/yarn/yarn.go b/pkg/fanal/analyzer/language/nodejs/yarn/yarn.go index b88767afbc6a..4d77b8601bfd 100644 --- a/pkg/fanal/analyzer/language/nodejs/yarn/yarn.go +++ b/pkg/fanal/analyzer/language/nodejs/yarn/yarn.go @@ -10,14 +10,12 @@ import ( "path" "path/filepath" "regexp" - "sort" "strings" "github.com/hashicorp/go-multierror" "github.com/samber/lo" "golang.org/x/xerrors" - "github.com/aquasecurity/trivy/pkg/dependency" "github.com/aquasecurity/trivy/pkg/dependency/parser/nodejs/packagejson" "github.com/aquasecurity/trivy/pkg/dependency/parser/nodejs/yarn" "github.com/aquasecurity/trivy/pkg/detector/library/compare/npm" @@ -47,7 +45,7 @@ var fragmentRegexp = regexp.MustCompile(`(\S+):(@?.*?)(@(.*?)|)$`) type yarnAnalyzer struct { logger *log.Logger packageJsonParser *packagejson.Parser - lockParser language.Parser + lockParser *yarn.Parser comparer npm.Comparer license *license.License } @@ -70,8 +68,18 @@ func (a yarnAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysis } err := fsutils.WalkDir(input.FS, ".", required, func(filePath string, d fs.DirEntry, r io.Reader) error { + // Detect direct and direct dev dependencies to use them when parsing yarn lock file + packageJsonPath := path.Join(path.Dir(filePath), types.NpmPkg) + directDeps, directDevDeps, err := a.parsePackageJsonDependencies(input.FS, packageJsonPath) + if errors.Is(err, fs.ErrNotExist) { + a.logger.Debug("package.json not found", log.FilePath(packageJsonPath)) + } else if err != nil { + a.logger.Warn("Unable to parse package.json to remove dev dependencies", + log.FilePath(packageJsonPath), log.Err(err)) + } + // Parse yarn.lock - app, err := a.parseYarnLock(filePath, r) + app, err := a.parseYarnLock(filePath, r, directDeps, directDevDeps) if err != nil { return xerrors.Errorf("parse error: %w", err) } else if app == nil { @@ -83,12 +91,6 @@ func (a yarnAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysis a.logger.Debug("Unable to traverse licenses", log.Err(err)) } - // Parse package.json alongside yarn.lock to find direct deps and mark dev deps - if err = a.analyzeDependencies(input.FS, path.Dir(filePath), app); err != nil { - a.logger.Warn("Unable to parse package.json to remove dev dependencies", - log.FilePath(path.Join(path.Dir(filePath), types.NpmPkg)), log.Err(err)) - } - // Fill licenses for i, lib := range app.Packages { if l, ok := licenses[lib.ID]; ok { @@ -152,128 +154,21 @@ func (a yarnAnalyzer) Version() int { return version } -func (a yarnAnalyzer) parseYarnLock(filePath string, r io.Reader) (*types.Application, error) { - return language.Parse(types.Yarn, filePath, r, a.lockParser) +type pkgJsonDeps struct { + directDeps map[string]string + directDevDeps map[string]string } -// analyzeDependencies analyzes the package.json file next to yarn.lock, -// distinguishing between direct and transitive dependencies as well as production and development dependencies. -func (a yarnAnalyzer) analyzeDependencies(fsys fs.FS, dir string, app *types.Application) error { - packageJsonPath := path.Join(dir, types.NpmPkg) - directDeps, directDevDeps, err := a.parsePackageJsonDependencies(fsys, packageJsonPath) - if errors.Is(err, fs.ErrNotExist) { - a.logger.Debug("package.json not found", log.FilePath(packageJsonPath)) - return nil - } else if err != nil { - return xerrors.Errorf("unable to parse %s: %w", dir, err) - } - - // yarn.lock file can contain same packages with different versions - // save versions separately for version comparison by comparator - pkgIDs := lo.SliceToMap(app.Packages, func(pkg types.Package) (string, types.Package) { - return pkg.ID, pkg - }) - - // Walk prod dependencies - pkgs, err := a.walkDependencies(app.Packages, pkgIDs, directDeps, false) - if err != nil { - return xerrors.Errorf("unable to walk dependencies: %w", err) - } - - // Walk dev dependencies - devPkgs, err := a.walkDependencies(app.Packages, pkgIDs, directDevDeps, true) - if err != nil { - return xerrors.Errorf("unable to walk dependencies: %w", err) - } - - // Merge prod and dev dependencies. - // If the same package is found in both prod and dev dependencies, use the one in prod. - pkgs = lo.Assign(devPkgs, pkgs) - - pkgSlice := lo.MapToSlice(pkgs, func(_ string, pkg types.Package) types.Package { - // Use Trivy ID format for dependencies with `latest` version in `ID` (`version` field contains the correct version) - if verFromID(pkg.ID) == latestVersion { - pkg.ID = dependency.ID(types.Yarn, pkg.Name, pkg.Version) - } - return pkg - }) - sort.Sort(types.Packages(pkgSlice)) - - // Save packages - app.Packages = pkgSlice - return nil +func (d pkgJsonDeps) Parse(r xio.ReadSeekerAt) ([]types.Package, []types.Dependency, error) { + return yarn.NewParser().Parse(r, d.directDeps, d.directDevDeps) } -func (a yarnAnalyzer) walkDependencies(pkgs []types.Package, pkgIDs map[string]types.Package, - directDeps map[string]string, dev bool) (map[string]types.Package, error) { - - // Identify direct dependencies - directPkgs := make(map[string]types.Package) - for _, pkg := range pkgs { - constraint, ok := directDeps[pkg.Name] - if !ok { - continue - } - - if constraint == latestVersion { - // pkgID with `latest` version uses `@latest` format. - if verFromID(pkg.ID) != latestVersion { - continue - } - } else { - // Handle aliases - // cf. https://classic.yarnpkg.com/lang/en/docs/cli/add/#toc-yarn-add-alias - if m := fragmentRegexp.FindStringSubmatch(constraint); len(m) == 5 { - pkg.Name = m[2] // original name - constraint = m[4] - } - - // npm has own comparer to compare versions - if match, err := a.comparer.MatchVersion(pkg.Version, constraint); err != nil { - return nil, xerrors.Errorf("unable to match version for %s", pkg.Name) - } else if !match { - continue - } - } - - // Mark as a direct dependency - pkg.Indirect = false - pkg.Relationship = types.RelationshipDirect - pkg.Dev = dev - directPkgs[pkg.ID] = pkg - - } - - // Walk indirect dependencies - for _, pkg := range directPkgs { - a.walkIndirectDependencies(pkg, pkgIDs, directPkgs) - } - - return directPkgs, nil -} - -func verFromID(id string) string { - _, ver, _ := strings.Cut(id, "@") - return ver -} - -func (a yarnAnalyzer) walkIndirectDependencies(pkg types.Package, pkgIDs, deps map[string]types.Package) { - for _, pkgID := range pkg.DependsOn { - if _, ok := deps[pkgID]; ok { - continue - } - - dep, ok := pkgIDs[pkgID] - if !ok { - continue - } - - dep.Indirect = true - dep.Relationship = types.RelationshipIndirect - dep.Dev = pkg.Dev - deps[dep.ID] = dep - a.walkIndirectDependencies(dep, pkgIDs, deps) +func (a yarnAnalyzer) parseYarnLock(filePath string, r io.Reader, directDeps, directDevDeps map[string]string) (*types.Application, error) { + pkgJson := pkgJsonDeps{ + directDeps: directDeps, + directDevDeps: directDevDeps, } + return language.Parse(types.Yarn, filePath, r, pkgJson) } func (a yarnAnalyzer) parsePackageJsonDependencies(fsys fs.FS, filePath string) (map[string]string, map[string]string, error) {