From 64812062aa3678c1239b698aebca896619da6e74 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 19 Aug 2022 11:21:45 +0100 Subject: [PATCH] Prevent symlinks causing duplicate package-file relationships As symlinks are traversed as part of file resolution, a scenario in which a package owns a file and its respective symlinks, causes multiple relationships to be created between the package and the file (as the symlinks do not appear in the list of files in the output). We prevent these files from being confused with each other by de-duplicating the files at the point of creating ownerships, and removing duplicate coordinates. This ensures we only get a single copy of each relationship. Signed-off-by: Justin Chadwell --- syft/pkg/cataloger/catalog.go | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index fa0e4d72da03..c9bd4f51a4f4 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -110,29 +110,36 @@ func packageFileOwnershipRelationships(p pkg.Package, resolver source.FilePathRe return nil, nil } - var relationships []artifact.Relationship + locations := map[artifact.ID]source.Location{} for _, path := range fileOwner.OwnedFiles() { - locations, err := resolver.FilesByPath(path) + pathRefs, err := resolver.FilesByPath(path) if err != nil { return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err) } - if len(locations) == 0 { + if len(pathRefs) == 0 { // ideally we want to warn users about missing files from a package, however, it is very common for // container image authors to delete files that are not needed in order to keep image sizes small. Adding // a warning here would be needlessly noisy (even for popular base images). continue } - for _, l := range locations { - relationships = append(relationships, artifact.Relationship{ - From: p, - To: l.Coordinates, - Type: artifact.ContainsRelationship, - }) + for _, ref := range pathRefs { + if oldRef, ok := locations[ref.Coordinates.ID()]; ok { + log.Debugf("found path duplicate of %s", oldRef.RealPath) + } + locations[ref.Coordinates.ID()] = ref } } + var relationships []artifact.Relationship + for _, location := range locations { + relationships = append(relationships, artifact.Relationship{ + From: p, + To: location.Coordinates, + Type: artifact.ContainsRelationship, + }) + } return relationships, nil }