diff --git a/go.mod b/go.mod index 12d924783f3..f5d644baadc 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b github.com/anchore/packageurl-go v0.0.0-20210922164639-b3fa992ebd29 - github.com/anchore/stereoscope v0.0.0-20220214165125-25ebd49a842b + github.com/anchore/stereoscope v0.0.0-20220217141419-c6f02aed9ed2 github.com/antihax/optional v1.0.0 github.com/bmatcuk/doublestar/v4 v4.0.2 github.com/docker/docker v20.10.12+incompatible diff --git a/go.sum b/go.sum index 863c3227bf7..fa0811fc4e1 100644 --- a/go.sum +++ b/go.sum @@ -282,8 +282,8 @@ github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZV github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E= github.com/anchore/packageurl-go v0.0.0-20210922164639-b3fa992ebd29 h1:K9LfnxwhqvihqU0+MF325FNy7fsKV9EGaUxdfR4gnWk= github.com/anchore/packageurl-go v0.0.0-20210922164639-b3fa992ebd29/go.mod h1:Oc1UkGaJwY6ND6vtAqPSlYrptKRJngHwkwB6W7l1uP0= -github.com/anchore/stereoscope v0.0.0-20220214165125-25ebd49a842b h1:PMMXpTEHVVLErrXQ6mH9ocLAQyvQu/LUhdstrhx7AC4= -github.com/anchore/stereoscope v0.0.0-20220214165125-25ebd49a842b/go.mod h1:QpDHHV2h1NNfu7klzU75XC8RvSlaPK6HHgi0dy8A6sk= +github.com/anchore/stereoscope v0.0.0-20220217141419-c6f02aed9ed2 h1:QuvMG+rqqJmtFRL+jqj5pFgjQcJSnEHEbtj1lKowLLQ= +github.com/anchore/stereoscope v0.0.0-20220217141419-c6f02aed9ed2/go.mod h1:QpDHHV2h1NNfu7klzU75XC8RvSlaPK6HHgi0dy8A6sk= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/andybalholm/brotli v1.0.0/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= diff --git a/internal/err_helper.go b/internal/err_helper.go index dad5f9c3dcd..132b60f89c2 100644 --- a/internal/err_helper.go +++ b/internal/err_helper.go @@ -16,12 +16,13 @@ func CloseAndLogError(closer io.Closer, location string) { } type ErrPath struct { - Path string - Err error + Context string + Path string + Err error } func (e ErrPath) Error() string { - return fmt.Sprintf("unable to observe contents of %+v: %v", e.Path, e.Err) + return fmt.Sprintf("%s unable to observe contents of %+v: %v", e.Context, e.Path, e.Err) } func IsErrPath(err error) bool { diff --git a/syft/file/all_regular_files.go b/syft/file/all_regular_files.go new file mode 100644 index 00000000000..e7612c8b242 --- /dev/null +++ b/syft/file/all_regular_files.go @@ -0,0 +1,30 @@ +package file + +import ( + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/source" +) + +func allRegularFiles(resolver source.FileResolver) (locations []source.Location) { + for location := range resolver.AllLocations() { + resolvedLocations, err := resolver.FilesByPath(location.RealPath) + if err != nil { + log.Warnf("unable to resolve %+v: %+v", location, err) + continue + } + + for _, resolvedLocation := range resolvedLocations { + metadata, err := resolver.FileMetadataByLocation(resolvedLocation) + if err != nil { + log.Warnf("unable to get metadata for %+v: %+v", location, err) + continue + } + + if metadata.Type != source.RegularFile { + continue + } + locations = append(locations, resolvedLocation) + } + } + return locations +} diff --git a/syft/file/all_regular_files_test.go b/syft/file/all_regular_files_test.go new file mode 100644 index 00000000000..aad33b3487b --- /dev/null +++ b/syft/file/all_regular_files_test.go @@ -0,0 +1,74 @@ +package file + +import ( + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft/source" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "testing" +) + +func Test_allRegularFiles(t *testing.T) { + type access struct { + realPath string + virtualPath string + } + tests := []struct { + name string + setup func() source.FileResolver + wantRealPaths *strset.Set + wantVirtualPaths *strset.Set + }{ + { + name: "image", + setup: func() source.FileResolver { + testImage := "image-file-type-mix" + + if *updateImageGoldenFiles { + imagetest.UpdateGoldenFixtureImage(t, testImage) + } + + img := imagetest.GetGoldenFixtureImage(t, testImage) + + s, err := source.NewFromImage(img, "---") + require.NoError(t, err) + + r, err := s.FileResolver(source.SquashedScope) + require.NoError(t, err) + + return r + }, + wantRealPaths: strset.New("/file-1.txt"), + wantVirtualPaths: strset.New("/file-1.txt", "/symlink-1", "/hardlink-1"), + }, + { + name: "directory", + setup: func() source.FileResolver { + s, err := source.NewFromDirectory("test-fixtures/symlinked-root/nested/link-root") + require.NoError(t, err) + r, err := s.FileResolver(source.SquashedScope) + require.NoError(t, err) + return r + }, + wantRealPaths: strset.New("file1.txt", "nested/file2.txt"), + wantVirtualPaths: strset.New("nested/linked-file1.txt"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resolver := tt.setup() + locations := allRegularFiles(resolver) + realLocations := strset.New() + virtualLocations := strset.New() + for _, l := range locations { + realLocations.Add(l.RealPath) + if l.VirtualPath != "" { + virtualLocations.Add(l.VirtualPath) + } + } + assert.ElementsMatch(t, tt.wantRealPaths.List(), realLocations.List(), "mismatched real paths") + assert.ElementsMatch(t, tt.wantVirtualPaths.List(), virtualLocations.List(), "mismatched virtual paths") + }) + } +} diff --git a/syft/file/classification_cataloger.go b/syft/file/classification_cataloger.go index 325db0e12a0..01a0685efe2 100644 --- a/syft/file/classification_cataloger.go +++ b/syft/file/classification_cataloger.go @@ -19,11 +19,12 @@ func (i *ClassificationCataloger) Catalog(resolver source.FileResolver) (map[sou results := make(map[source.Coordinates][]Classification) numResults := 0 - for location := range resolver.AllLocations() { + for _, location := range allRegularFiles(resolver) { for _, classifier := range i.classifiers { result, err := classifier.Classify(resolver, location) if err != nil { - return nil, err + log.Warnf("file classification cataloger failed with class=%q at location=%+v: %+v", classifier.Class, location, err) + continue } if result != nil { results[location.Coordinates] = append(results[location.Coordinates], *result) diff --git a/syft/file/classification_cataloger_test.go b/syft/file/classification_cataloger_test.go index 365dafd0fc9..da6fb37cb7b 100644 --- a/syft/file/classification_cataloger_test.go +++ b/syft/file/classification_cataloger_test.go @@ -1,6 +1,7 @@ package file import ( + "github.com/anchore/stereoscope/pkg/imagetest" "testing" "github.com/anchore/syft/syft/source" @@ -88,7 +89,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { { name: "positive-busybox", fixtureDir: "test-fixtures/classifiers/positive", - location: "busybox", + location: "[", // note: busybox is a link to [ expected: []Classification{ { Class: "busybox-binary", @@ -116,13 +117,67 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { actualResults, err := c.Catalog(resolver) test.expectedErr(t, err) - loc := source.NewLocation(test.location) + ok := false + for actualLoc, actualClassification := range actualResults { + if test.location == actualLoc.RealPath { + ok = true + assert.Equal(t, test.expected, actualClassification) + } + } + + if !ok { + t.Fatalf("could not find test location=%q", test.location) + } + + }) + } +} + +func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) { + tests := []struct { + name string + fixtureImage string + location string + expected []Classification + expectedErr func(assert.TestingT, error, ...interface{}) bool + }{ + { + name: "busybox-regression", + fixtureImage: "image-busybox", + location: "/bin/[", + expected: []Classification{ + { + Class: "busybox-binary", + Metadata: map[string]string{ + "version": "1.35.0", + }, + }, + }, + expectedErr: assert.NoError, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + + c, err := NewClassificationCataloger(DefaultClassifiers) + test.expectedErr(t, err) + + img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureImage) + src, err := source.NewFromImage(img, "test-img") + test.expectedErr(t, err) + + resolver, err := src.FileResolver(source.SquashedScope) + test.expectedErr(t, err) + + actualResults, err := c.Catalog(resolver) + test.expectedErr(t, err) ok := false - for actual_loc, actual_classification := range actualResults { - if loc.RealPath == actual_loc.RealPath { + for actuaLoc, actualClassification := range actualResults { + if actuaLoc.RealPath == test.location { ok = true - assert.Equal(t, test.expected, actual_classification) + assert.Equal(t, test.expected, actualClassification) } } diff --git a/syft/file/contents_cataloger.go b/syft/file/contents_cataloger.go index 8ef0a6615ba..b65a042841c 100644 --- a/syft/file/contents_cataloger.go +++ b/syft/file/contents_cataloger.go @@ -3,6 +3,7 @@ package file import ( "bytes" "encoding/base64" + "fmt" "io" "github.com/anchore/syft/internal" @@ -66,9 +67,12 @@ func (i *ContentsCataloger) catalogLocation(resolver source.FileResolver, locati buf := &bytes.Buffer{} encoder := base64.NewEncoder(base64.StdEncoding, buf) if _, err = io.Copy(encoder, contentReader); err != nil { - return "", internal.ErrPath{Path: location.RealPath, Err: err} + return "", internal.ErrPath{Context: "contents-cataloger", Path: location.RealPath, Err: err} + } + // note: it's important to close the reader before reading from the buffer since closing will flush the remaining bytes + if err := encoder.Close(); err != nil { + return "", fmt.Errorf("unable to close base64 encoder: %w", err) } - encoder.Close() return buf.String(), nil } diff --git a/syft/file/digest_cataloger.go b/syft/file/digest_cataloger.go index 502173a23f6..cb80f1a85a4 100644 --- a/syft/file/digest_cataloger.go +++ b/syft/file/digest_cataloger.go @@ -2,6 +2,7 @@ package file import ( "crypto" + "errors" "fmt" "hash" "io" @@ -19,6 +20,8 @@ import ( "github.com/anchore/syft/syft/source" ) +var errUndigestableFile = errors.New("undigestable file") + type DigestsCataloger struct { hashes []crypto.Hash } @@ -31,16 +34,18 @@ func NewDigestsCataloger(hashes []crypto.Hash) (*DigestsCataloger, error) { func (i *DigestsCataloger) Catalog(resolver source.FileResolver) (map[source.Coordinates][]Digest, error) { results := make(map[source.Coordinates][]Digest) - var locations []source.Location - for location := range resolver.AllLocations() { - locations = append(locations, location) - } + locations := allRegularFiles(resolver) stage, prog := digestsCatalogingProgress(int64(len(locations))) for _, location := range locations { stage.Current = location.RealPath result, err := i.catalogLocation(resolver, location) + + if errors.Is(err, errUndigestableFile) { + continue + } + if internal.IsErrPathPermission(err) { - log.Debugf("file digests cataloger skipping - %+v", err) + log.Debugf("file digests cataloger skipping %q: %+v", location.RealPath, err) continue } @@ -56,6 +61,16 @@ func (i *DigestsCataloger) Catalog(resolver source.FileResolver) (map[source.Coo } func (i *DigestsCataloger) catalogLocation(resolver source.FileResolver, location source.Location) ([]Digest, error) { + meta, err := resolver.FileMetadataByLocation(location) + if err != nil { + return nil, err + } + + // we should only attempt to report digests for files that are regular files (don't attempt to resolve links) + if meta.Type != source.RegularFile { + return nil, errUndigestableFile + } + contentReader, err := resolver.FileContentsByLocation(location) if err != nil { return nil, err @@ -72,7 +87,7 @@ func (i *DigestsCataloger) catalogLocation(resolver source.FileResolver, locatio size, err := io.Copy(io.MultiWriter(writers...), contentReader) if err != nil { - return nil, internal.ErrPath{Path: location.RealPath, Err: err} + return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err} } if size == 0 { diff --git a/syft/file/digest_cataloger_test.go b/syft/file/digest_cataloger_test.go index ba06e408549..2779215089d 100644 --- a/syft/file/digest_cataloger_test.go +++ b/syft/file/digest_cataloger_test.go @@ -3,8 +3,10 @@ package file import ( "crypto" "fmt" + "github.com/stretchr/testify/require" "io/ioutil" "os" + "path/filepath" "testing" "github.com/anchore/stereoscope/pkg/file" @@ -16,11 +18,11 @@ import ( "github.com/anchore/syft/syft/source" ) -func testDigests(t testing.TB, files []string, hashes ...crypto.Hash) map[source.Coordinates][]Digest { +func testDigests(t testing.TB, root string, files []string, hashes ...crypto.Hash) map[source.Coordinates][]Digest { digests := make(map[source.Coordinates][]Digest) for _, f := range files { - fh, err := os.Open(f) + fh, err := os.Open(filepath.Join(root, f)) if err != nil { t.Fatalf("could not open %q : %+v", f, err) } @@ -29,6 +31,12 @@ func testDigests(t testing.TB, files []string, hashes ...crypto.Hash) map[source t.Fatalf("could not read %q : %+v", f, err) } + if len(b) == 0 { + // we don't keep digests for empty files + digests[source.NewLocation(f).Coordinates] = []Digest{} + continue + } + for _, hash := range hashes { h := hash.New() h.Write(b) @@ -42,55 +50,43 @@ func testDigests(t testing.TB, files []string, hashes ...crypto.Hash) map[source return digests } -func TestDigestsCataloger_SimpleContents(t *testing.T) { - regularFiles := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"} +func TestDigestsCataloger(t *testing.T) { tests := []struct { - name string - digests []crypto.Hash - files []string - expected map[source.Coordinates][]Digest - catalogErr bool + name string + digests []crypto.Hash + files []string + expected map[source.Coordinates][]Digest }{ { name: "md5", digests: []crypto.Hash{crypto.MD5}, - files: regularFiles, - expected: testDigests(t, regularFiles, crypto.MD5), + files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"}, + expected: testDigests(t, "test-fixtures/last", []string{"empty/empty", "path.txt"}, crypto.MD5), }, { name: "md5-sha1-sha256", digests: []crypto.Hash{crypto.MD5, crypto.SHA1, crypto.SHA256}, - files: regularFiles, - expected: testDigests(t, regularFiles, crypto.MD5, crypto.SHA1, crypto.SHA256), - }, - { - name: "directory returns error", - digests: []crypto.Hash{crypto.MD5}, - files: []string{"test-fixtures/last"}, - catalogErr: true, + files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"}, + expected: testDigests(t, "test-fixtures/last", []string{"empty/empty", "path.txt"}, crypto.MD5, crypto.SHA1, crypto.SHA256), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { c, err := NewDigestsCataloger(test.digests) - if err != nil { - t.Fatalf("could not create cataloger: %+v", err) - } + require.NoError(t, err) - resolver := source.NewMockResolverForPaths(test.files...) - actual, err := c.Catalog(resolver) - if err != nil && !test.catalogErr { - t.Fatalf("could not catalog (but should have been able to): %+v", err) - } else if err == nil && test.catalogErr { - t.Fatalf("expected catalog error but did not get one") - } else if test.catalogErr && err != nil { - return - } + src, err := source.NewFromDirectory("test-fixtures/last/") + require.NoError(t, err) - assert.Equal(t, actual, test.expected, "mismatched digests") + resolver, err := src.FileResolver(source.SquashedScope) + require.NoError(t, err) + + actual, err := c.Catalog(resolver) + require.NoError(t, err) + assert.Equal(t, test.expected, actual, "mismatched digests") }) } } diff --git a/syft/file/metadata_cataloger_test.go b/syft/file/metadata_cataloger_test.go index b4aae78fc7e..9ca27a6d014 100644 --- a/syft/file/metadata_cataloger_test.go +++ b/syft/file/metadata_cataloger_test.go @@ -66,7 +66,7 @@ func TestFileMetadataCataloger(t *testing.T) { LinkDestination: "file-1.txt", UserID: 1, GroupID: 2, - MIMEType: "text/plain", + MIMEType: "", }, }, { @@ -78,7 +78,7 @@ func TestFileMetadataCataloger(t *testing.T) { LinkDestination: "file-1.txt", UserID: 0, GroupID: 0, - MIMEType: "text/plain", + MIMEType: "", }, }, { @@ -89,7 +89,7 @@ func TestFileMetadataCataloger(t *testing.T) { Type: "CharacterDevice", UserID: 0, GroupID: 0, - MIMEType: "text/plain", + MIMEType: "", }, }, { @@ -100,7 +100,7 @@ func TestFileMetadataCataloger(t *testing.T) { Type: "BlockDevice", UserID: 0, GroupID: 0, - MIMEType: "text/plain", + MIMEType: "", }, }, { @@ -111,7 +111,7 @@ func TestFileMetadataCataloger(t *testing.T) { Type: "FIFONode", UserID: 0, GroupID: 0, - MIMEType: "text/plain", + MIMEType: "", }, }, { @@ -122,7 +122,7 @@ func TestFileMetadataCataloger(t *testing.T) { Type: "Directory", UserID: 0, GroupID: 0, - MIMEType: "text/plain", + MIMEType: "", }, }, } diff --git a/syft/file/secrets_cataloger.go b/syft/file/secrets_cataloger.go index 37ec20a746c..b8f31980ea9 100644 --- a/syft/file/secrets_cataloger.go +++ b/syft/file/secrets_cataloger.go @@ -42,10 +42,7 @@ func NewSecretsCataloger(patterns map[string]*regexp.Regexp, revealValues bool, func (i *SecretsCataloger) Catalog(resolver source.FileResolver) (map[source.Coordinates][]SearchResult, error) { results := make(map[source.Coordinates][]SearchResult) - var locations []source.Location - for location := range resolver.AllLocations() { - locations = append(locations, location) - } + locations := allRegularFiles(resolver) stage, prog, secretsDiscovered := secretsCatalogingProgress(int64(len(locations))) for _, location := range locations { stage.Current = location.RealPath @@ -75,6 +72,10 @@ func (i *SecretsCataloger) catalogLocation(resolver source.FileResolver, locatio return nil, err } + if metadata.Size == 0 { + return nil, nil + } + if i.skipFilesAboveSize > 0 && metadata.Size > i.skipFilesAboveSize { return nil, nil } @@ -82,7 +83,7 @@ func (i *SecretsCataloger) catalogLocation(resolver source.FileResolver, locatio // TODO: in the future we can swap out search strategies here secrets, err := catalogLocationByLine(resolver, location, i.patterns) if err != nil { - return nil, internal.ErrPath{Path: location.RealPath, Err: err} + return nil, internal.ErrPath{Context: "secrets-cataloger", Path: location.RealPath, Err: err} } if i.revealValues { diff --git a/syft/file/test-fixtures/classifiers/positive/[ b/syft/file/test-fixtures/classifiers/positive/[ new file mode 100644 index 00000000000..7829d71b941 --- /dev/null +++ b/syft/file/test-fixtures/classifiers/positive/[ @@ -0,0 +1,3 @@ +# note: this SHOULD match as busybox 3.33.3 + +noise!BusyBox v3.33.3!noise \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/positive/busybox b/syft/file/test-fixtures/classifiers/positive/busybox deleted file mode 100644 index 7829d71b941..00000000000 --- a/syft/file/test-fixtures/classifiers/positive/busybox +++ /dev/null @@ -1,3 +0,0 @@ -# note: this SHOULD match as busybox 3.33.3 - -noise!BusyBox v3.33.3!noise \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/positive/busybox b/syft/file/test-fixtures/classifiers/positive/busybox new file mode 120000 index 00000000000..c3e3150b864 --- /dev/null +++ b/syft/file/test-fixtures/classifiers/positive/busybox @@ -0,0 +1 @@ +./[ \ No newline at end of file diff --git a/syft/file/test-fixtures/image-busybox/Dockerfile b/syft/file/test-fixtures/image-busybox/Dockerfile new file mode 100644 index 00000000000..94b54d2f4fe --- /dev/null +++ b/syft/file/test-fixtures/image-busybox/Dockerfile @@ -0,0 +1 @@ +FROM busybox:1.35 \ No newline at end of file diff --git a/syft/file/test-fixtures/image-file-type-mix/Dockerfile b/syft/file/test-fixtures/image-file-type-mix/Dockerfile index d8f72858797..c2d61ef4da9 100644 --- a/syft/file/test-fixtures/image-file-type-mix/Dockerfile +++ b/syft/file/test-fixtures/image-file-type-mix/Dockerfile @@ -9,3 +9,5 @@ RUN ln file-1.txt hardlink-1 RUN mknod char-device-1 c 89 1 RUN mknod block-device-1 b 0 1 RUN mknod fifo-1 p +RUN mkdir /dir +RUN rm -rf home etc/group etc/localtime etc/mtab etc/network etc/passwd etc/shadow var usr bin/* \ No newline at end of file diff --git a/syft/file/test-fixtures/last/empty/empty b/syft/file/test-fixtures/last/empty/empty new file mode 100644 index 00000000000..e69de29bb2d diff --git a/syft/file/test-fixtures/snapshot/stereoscope-fixture-image-file-type-mix.golden b/syft/file/test-fixtures/snapshot/stereoscope-fixture-image-file-type-mix.golden index 8acd4584ca2..e85036214d9 100644 Binary files a/syft/file/test-fixtures/snapshot/stereoscope-fixture-image-file-type-mix.golden and b/syft/file/test-fixtures/snapshot/stereoscope-fixture-image-file-type-mix.golden differ diff --git a/syft/file/test-fixtures/symlinked-root/nested/link-root b/syft/file/test-fixtures/symlinked-root/nested/link-root new file mode 120000 index 00000000000..24659224aae --- /dev/null +++ b/syft/file/test-fixtures/symlinked-root/nested/link-root @@ -0,0 +1 @@ +../real-root \ No newline at end of file diff --git a/syft/file/test-fixtures/symlinked-root/real-root/file1.txt b/syft/file/test-fixtures/symlinked-root/real-root/file1.txt new file mode 100644 index 00000000000..5452844a200 --- /dev/null +++ b/syft/file/test-fixtures/symlinked-root/real-root/file1.txt @@ -0,0 +1 @@ +contents! diff --git a/syft/file/test-fixtures/symlinked-root/real-root/nested/file2.txt b/syft/file/test-fixtures/symlinked-root/real-root/nested/file2.txt new file mode 100644 index 00000000000..5f7e2f21348 --- /dev/null +++ b/syft/file/test-fixtures/symlinked-root/real-root/nested/file2.txt @@ -0,0 +1 @@ +more contents! diff --git a/syft/file/test-fixtures/symlinked-root/real-root/nested/linked-file1.txt b/syft/file/test-fixtures/symlinked-root/real-root/nested/linked-file1.txt new file mode 120000 index 00000000000..4e7feb2d8cb --- /dev/null +++ b/syft/file/test-fixtures/symlinked-root/real-root/nested/linked-file1.txt @@ -0,0 +1 @@ +../file1.txt \ No newline at end of file diff --git a/syft/source/all_layers_resolver.go b/syft/source/all_layers_resolver.go index 02efd625850..4293dc825b7 100644 --- a/syft/source/all_layers_resolver.go +++ b/syft/source/all_layers_resolver.go @@ -126,7 +126,7 @@ func (r *allLayersResolver) FilesByGlob(patterns ...string) ([]Location, error) for _, pattern := range patterns { for idx, layerIdx := range r.layers { - results, err := r.img.Layers[layerIdx].Tree.FilesByGlob(pattern, filetree.DoNotFollowDeadBasenameLinks) + results, err := r.img.Layers[layerIdx].Tree.FilesByGlob(pattern, filetree.FollowBasenameLinks, filetree.DoNotFollowDeadBasenameLinks) if err != nil { return nil, fmt.Errorf("failed to resolve files by glob (%s): %w", pattern, err) } @@ -184,6 +184,22 @@ func (r *allLayersResolver) RelativeFileByPath(location Location, path string) * // FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer. // If the path does not exist an error is returned. func (r *allLayersResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { + entry, err := r.img.FileCatalog.Get(location.ref) + if err != nil { + return nil, fmt.Errorf("unable to get metadata for path=%q from file catalog: %w", location.RealPath, err) + } + + switch entry.Metadata.TypeFlag { + case tar.TypeSymlink, tar.TypeLink: + // the location we are searching may be a symlink, we should always work with the resolved file + newLocation := r.RelativeFileByPath(location, location.VirtualPath) + if newLocation == nil { + // this is a dead link + return nil, fmt.Errorf("no contents for location=%q", location.VirtualPath) + } + location = *newLocation + } + return r.img.FileContentsByRef(location.ref) } diff --git a/syft/source/all_layers_resolver_test.go b/syft/source/all_layers_resolver_test.go index d6c167e5f86..e9e0780125f 100644 --- a/syft/source/all_layers_resolver_test.go +++ b/syft/source/all_layers_resolver_test.go @@ -1,6 +1,8 @@ package source import ( + "github.com/stretchr/testify/require" + "io" "testing" "github.com/stretchr/testify/assert" @@ -301,3 +303,241 @@ func Test_imageAllLayersResolver_hasFilesystemIDInLocation(t *testing.T) { } } + +func TestAllLayersImageResolver_FilesContents(t *testing.T) { + + tests := []struct { + name string + fixture string + contents []string + }{ + { + name: "one degree", + fixture: "link-2", + contents: []string{ + "file 2!", // from the first resolved layer's perspective + "NEW file override!", // from the second resolved layers perspective + }, + }, + { + name: "two degrees", + fixture: "link-indirect", + contents: []string{ + "file 2!", + "NEW file override!", + }, + }, + { + name: "dead link", + fixture: "link-dead", + contents: []string{}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := newAllLayersResolver(img) + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(test.fixture) + require.NoError(t, err) + + // the given path should have an overridden file + require.Len(t, refs, len(test.contents)) + + for idx, loc := range refs { + reader, err := resolver.FileContentsByLocation(loc) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + assert.Equal(t, test.contents[idx], string(actual)) + } + + }) + } +} + +func Test_imageAllLayersResolver_resolvesLinks(t *testing.T) { + tests := []struct { + name string + runner func(FileResolver) []Location + expected []Location + }{ + { + name: "by mimetype", + runner: func(resolver FileResolver) []Location { + // links should not show up when searching mimetype + actualLocations, err := resolver.FilesByMIMEType("text/plain") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "/etc/group", + }, + VirtualPath: "/etc/group", + }, + { + Coordinates: Coordinates{ + RealPath: "/etc/passwd", + }, + VirtualPath: "/etc/passwd", + }, + { + Coordinates: Coordinates{ + RealPath: "/etc/shadow", + }, + VirtualPath: "/etc/shadow", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-1.txt", + }, + VirtualPath: "/file-1.txt", + }, + // copy 1 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-3.txt", + }, + VirtualPath: "/file-3.txt", + }, + // copy 2 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + // copy 1 + { + Coordinates: Coordinates{ + RealPath: "/parent/file-4.txt", + }, + VirtualPath: "/parent/file-4.txt", + }, + // copy 2 + { + Coordinates: Coordinates{ + RealPath: "/parent/file-4.txt", + }, + VirtualPath: "/parent/file-4.txt", + }, + }, + }, + { + name: "by glob", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("*ink-*") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "/file-1.txt", + }, + VirtualPath: "/link-1", + }, + // copy 1 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-2", + }, + // copy 2 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-2", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-3.txt", + }, + VirtualPath: "/link-within", + }, + }, + }, + { + name: "by path to degree 1 link", + runner: func(resolver FileResolver) []Location { + // links resolve to the final file + actualLocations, err := resolver.FilesByPath("/link-2") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + // we have multiple copies across layers + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-2", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-2", + }, + }, + }, + { + name: "by path to degree 2 link", + runner: func(resolver FileResolver) []Location { + // multiple links resolves to the final file + actualLocations, err := resolver.FilesByPath("/link-indirect") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + // we have multiple copies across layers + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-indirect", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-indirect", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := newAllLayersResolver(img) + assert.NoError(t, err) + + actualLocations := test.runner(resolver) + assert.Len(t, actualLocations, len(test.expected)) + for i, actual := range actualLocations { + assert.Equal(t, test.expected[i].RealPath, actual.RealPath) + assert.Equal(t, test.expected[i].VirtualPath, actual.VirtualPath) + } + }) + } + +} diff --git a/syft/source/directory_resolver.go b/syft/source/directory_resolver.go index 6a3fe31e69b..0691defb43c 100644 --- a/syft/source/directory_resolver.go +++ b/syft/source/directory_resolver.go @@ -48,24 +48,35 @@ type directoryResolver struct { } func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryResolver, error) { - currentWd, err := os.Getwd() + currentWD, err := os.Getwd() if err != nil { - return nil, fmt.Errorf("could not create directory resolver: %w", err) + return nil, fmt.Errorf("could not gret CWD: %w", err) + } + // we have to account for the root being accessed through a symlink path and always resolve the real path. Otherwise + // we will not be able to normalize given paths that fall under the resolver + cleanCWD, err := filepath.EvalSymlinks(currentWD) + if err != nil { + return nil, fmt.Errorf("could not evaluate CWD symlinks: %w", err) + } + + cleanRoot, err := filepath.EvalSymlinks(root) + if err != nil { + return nil, fmt.Errorf("could not evaluate root=%q symlinks: %w", root, err) } var currentWdRelRoot string - if path.IsAbs(root) { - currentWdRelRoot, err = filepath.Rel(currentWd, root) + if path.IsAbs(cleanRoot) { + currentWdRelRoot, err = filepath.Rel(cleanCWD, cleanRoot) if err != nil { - return nil, fmt.Errorf("could not create directory resolver: %w", err) + return nil, fmt.Errorf("could not determine given root path to CWD: %w", err) } } else { - currentWdRelRoot = filepath.Clean(root) + currentWdRelRoot = filepath.Clean(cleanRoot) } resolver := directoryResolver{ - path: root, - currentWd: currentWd, + path: cleanRoot, + currentWd: cleanCWD, currentWdRelativeToRoot: currentWdRelRoot, fileTree: filetree.NewFileTree(), metadata: make(map[file.ID]FileMetadata), @@ -74,7 +85,7 @@ func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryR errPaths: make(map[string]error), } - return &resolver, indexAllRoots(root, resolver.indexTree) + return &resolver, indexAllRoots(cleanRoot, resolver.indexTree) } func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]string, error) { @@ -233,7 +244,9 @@ func (r directoryResolver) addSymlinkToIndex(p string, info os.FileInfo) (string } location := NewLocationFromDirectory(p, *ref) + location.VirtualPath = p metadata := fileMetadataFromPath(p, usedInfo, r.isInIndex(location)) + metadata.LinkDestination = linkTarget r.addFileMetadataToIndex(ref, metadata) return targetAbsPath, nil @@ -305,8 +318,15 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) continue } + // we should be resolving symlinks and preserving this information as a VirtualPath to the real file + evaluatedPath, err := filepath.EvalSymlinks(userStrPath) + if err != nil { + log.Warnf("directory resolver unable to evaluate symlink for path=%q : %+v", userPath, err) + continue + } + // TODO: why not use stored metadata? - fileMeta, err := os.Stat(userStrPath) + fileMeta, err := os.Stat(evaluatedPath) if errors.Is(err, os.ErrNotExist) { // note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform // specific, but essentially hints at the same overall problem (that the path does not exist). Such an @@ -317,7 +337,7 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) // invalid paths. This logging statement is meant to raise IO or permissions related problems. var pathErr *os.PathError if !errors.As(err, &pathErr) { - log.Warnf("path is not valid (%s): %+v", userStrPath, err) + log.Warnf("path is not valid (%s): %+v", evaluatedPath, err) } continue } @@ -331,9 +351,14 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) userStrPath = windowsToPosix(userStrPath) } - exists, ref, err := r.fileTree.File(file.Path(userStrPath)) + exists, ref, err := r.fileTree.File(file.Path(userStrPath), filetree.FollowBasenameLinks) if err == nil && exists { - references = append(references, NewLocationFromDirectory(r.responsePath(userStrPath), *ref)) + loc := NewVirtualLocationFromDirectory( + r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root + r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root + *ref, + ) + references = append(references, loc) } } @@ -345,12 +370,17 @@ func (r directoryResolver) FilesByGlob(patterns ...string) ([]Location, error) { result := make([]Location, 0) for _, pattern := range patterns { - globResults, err := r.fileTree.FilesByGlob(pattern) + globResults, err := r.fileTree.FilesByGlob(pattern, filetree.FollowBasenameLinks) if err != nil { return nil, err } for _, globResult := range globResults { - result = append(result, NewLocationFromDirectory(r.responsePath(string(globResult.MatchPath)), globResult.Reference)) + loc := NewVirtualLocationFromDirectory( + r.responsePath(string(globResult.Reference.RealPath)), // the actual path relative to the resolver root + r.responsePath(string(globResult.MatchPath)), // the path used to access this file, relative to the resolver root + globResult.Reference, + ) + result = append(result, loc) } } @@ -404,7 +434,8 @@ func (r *directoryResolver) AllLocations() <-chan Location { results := make(chan Location) go func() { defer close(results) - for _, ref := range r.fileTree.AllFiles() { + // this should be all non-directory types + for _, ref := range r.fileTree.AllFiles(file.TypeReg, file.TypeSymlink, file.TypeHardLink, file.TypeBlockDevice, file.TypeCharacterDevice, file.TypeFifo) { results <- NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref) } }() diff --git a/syft/source/directory_resolver_test.go b/syft/source/directory_resolver_test.go index a95634aaf3b..c791692de04 100644 --- a/syft/source/directory_resolver_test.go +++ b/syft/source/directory_resolver_test.go @@ -4,6 +4,7 @@ package source import ( + "io" "io/fs" "io/ioutil" "os" @@ -259,6 +260,45 @@ func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { assert.Equal(t, "image-symlinks/file-1.txt", refs[0].RealPath) } +func TestDirectoryResolver_FilesByPath_ResolvesSymlinks(t *testing.T) { + + tests := []struct { + name string + fixture string + }{ + { + name: "one degree", + fixture: "link_to_new_readme", + }, + { + name: "two degrees", + fixture: "link_to_link_to_new_readme", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple") + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(test.fixture) + require.NoError(t, err) + assert.Len(t, refs, 1) + + reader, err := resolver.FileContentsByLocation(refs[0]) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + expected, err := os.ReadFile("test-fixtures/symlinks-simple/readme") + require.NoError(t, err) + + assert.Equal(t, string(expected), string(actual)) + }) + } +} + func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { // let's make certain that "dev/place" is not ignored, since it is not "/dev/place" resolver, err := newDirectoryResolver("test-fixtures/system_paths/target") @@ -583,7 +623,7 @@ func Test_directoryResolver_FilesByMIMEType(t *testing.T) { func Test_IndexingNestedSymLinks(t *testing.T) { resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple") - assert.NoError(t, err) + require.NoError(t, err) // check that we can get the real path locations, err := resolver.FilesByPath("./readme") @@ -593,12 +633,41 @@ func Test_IndexingNestedSymLinks(t *testing.T) { // check that we can access the same file via 1 symlink locations, err = resolver.FilesByPath("./link_to_new_readme") require.NoError(t, err) - assert.Len(t, locations, 1) + require.Len(t, locations, 1) + assert.Equal(t, "readme", locations[0].RealPath) + assert.Equal(t, "link_to_new_readme", locations[0].VirtualPath) // check that we can access the same file via 2 symlinks locations, err = resolver.FilesByPath("./link_to_link_to_new_readme") require.NoError(t, err) - assert.Len(t, locations, 1) + require.Len(t, locations, 1) + assert.Equal(t, "readme", locations[0].RealPath) + assert.Equal(t, "link_to_link_to_new_readme", locations[0].VirtualPath) + + // check that we can access the same file via 2 symlinks + locations, err = resolver.FilesByGlob("**/link_*") + require.NoError(t, err) + require.Len(t, locations, 2) + + // returned locations can be in any order + expectedVirtualPaths := []string{ + "link_to_link_to_new_readme", + "link_to_new_readme", + } + + expectedRealPaths := []string{ + "readme", + } + + actualRealPaths := strset.New() + actualVirtualPaths := strset.New() + for _, a := range locations { + actualVirtualPaths.Add(a.VirtualPath) + actualRealPaths.Add(a.RealPath) + } + + assert.ElementsMatch(t, expectedVirtualPaths, actualVirtualPaths.List()) + assert.ElementsMatch(t, expectedRealPaths, actualRealPaths.List()) } func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) { @@ -607,38 +676,27 @@ func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) { } resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", filterFn) - assert.NoError(t, err) - - var testingLocations []Location + require.NoError(t, err) // the path to the real file is PRUNED from the index, so we should NOT expect a location returned locations, err := resolver.FilesByPath("./readme") require.NoError(t, err) assert.Empty(t, locations) - // check that we can access the same file via 1 symlink + // check that we cannot access the file even via symlink locations, err = resolver.FilesByPath("./link_to_new_readme") require.NoError(t, err) - assert.Len(t, locations, 1) - testingLocations = append(testingLocations, locations...) + assert.Empty(t, locations) - // check that we can access the same file via 2 symlinks + // check that we still cannot access the same file via 2 symlinks locations, err = resolver.FilesByPath("./link_to_link_to_new_readme") require.NoError(t, err) - assert.Len(t, locations, 1) - testingLocations = append(testingLocations, locations...) - - // check that we CANNOT get contents from any of the link locations - for _, location := range testingLocations { - contentReader, err := resolver.FileContentsByLocation(location) - assert.Errorf(t, err, "expected an error for getting content from a location not in the index") - assert.Nil(t, contentReader) - } + assert.Empty(t, locations) } func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-roots/root") - assert.NoError(t, err) + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-multiple-roots/root") + require.NoError(t, err) // check that we can get the real path locations, err := resolver.FilesByPath("./readme") @@ -649,6 +707,26 @@ func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) { locations, err = resolver.FilesByPath("./link_to_link_to_readme") require.NoError(t, err) assert.Len(t, locations, 1) + + // something looks wrong here + t.Failed() +} + +func Test_RootViaSymlink(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/symlinked-root/nested/link-root") + require.NoError(t, err) + + locations, err := resolver.FilesByPath("./file1.txt") + require.NoError(t, err) + assert.Len(t, locations, 1) + + locations, err = resolver.FilesByPath("./nested/file2.txt") + require.NoError(t, err) + assert.Len(t, locations, 1) + + locations, err = resolver.FilesByPath("./nested/linked-file1.txt") + require.NoError(t, err) + assert.Len(t, locations, 1) } func Test_directoryResolver_FileContentsByLocation(t *testing.T) { diff --git a/syft/source/file_metadata.go b/syft/source/file_metadata.go index 37c97d81819..432a1d8f4e0 100644 --- a/syft/source/file_metadata.go +++ b/syft/source/file_metadata.go @@ -63,6 +63,7 @@ func fileMetadataFromPath(path string, info os.FileInfo, withMIMEType bool) File // unsupported across platforms UserID: uid, GroupID: gid, + Size: info.Size(), MIMEType: mimeType, } } diff --git a/syft/source/image_squash_resolver.go b/syft/source/image_squash_resolver.go index 914e546aa63..ba584897eff 100644 --- a/syft/source/image_squash_resolver.go +++ b/syft/source/image_squash_resolver.go @@ -1,6 +1,7 @@ package source import ( + "archive/tar" "fmt" "io" @@ -82,7 +83,7 @@ func (r *imageSquashResolver) FilesByGlob(patterns ...string) ([]Location, error uniqueLocations := make([]Location, 0) for _, pattern := range patterns { - results, err := r.img.SquashedTree().FilesByGlob(pattern) + results, err := r.img.SquashedTree().FilesByGlob(pattern, filetree.FollowBasenameLinks) if err != nil { return nil, fmt.Errorf("failed to resolve files by glob (%s): %w", pattern, err) } @@ -137,6 +138,29 @@ func (r *imageSquashResolver) RelativeFileByPath(_ Location, path string) *Locat // FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer. // If the path does not exist an error is returned. func (r *imageSquashResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { + entry, err := r.img.FileCatalog.Get(location.ref) + if err != nil { + return nil, fmt.Errorf("unable to get metadata for path=%q from file catalog: %w", location.RealPath, err) + } + + switch entry.Metadata.TypeFlag { + case tar.TypeSymlink, tar.TypeLink: + // the location we are searching may be a symlink, we should always work with the resolved file + locations, err := r.FilesByPath(location.RealPath) + if err != nil { + return nil, fmt.Errorf("failed to resolve content location at location=%+v: %w", location, err) + } + + switch len(locations) { + case 0: + return nil, fmt.Errorf("link resolution failed while resolving content location: %+v", location) + case 1: + location = locations[0] + default: + return nil, fmt.Errorf("link resolution resulted in multiple results while resolving content location: %+v", location) + } + } + return r.img.FileContentsByRef(location.ref) } diff --git a/syft/source/image_squash_resolver_test.go b/syft/source/image_squash_resolver_test.go index d3de73362bf..7f0819b9ec7 100644 --- a/syft/source/image_squash_resolver_test.go +++ b/syft/source/image_squash_resolver_test.go @@ -1,6 +1,8 @@ package source import ( + "github.com/stretchr/testify/require" + "io" "testing" "github.com/scylladb/go-set/strset" @@ -239,7 +241,7 @@ func Test_imageSquashResolver_FilesByMIMEType(t *testing.T) { { fixtureName: "image-simple", mimeType: "text/plain", - expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt", "/really/nested", "/really"), + expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"), }, } @@ -289,3 +291,210 @@ func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) { } } + +func TestSquashImageResolver_FilesContents(t *testing.T) { + + tests := []struct { + name string + fixture string + contents []string + }{ + { + name: "one degree", + fixture: "link-2", + contents: []string{ + "NEW file override!", // always from the squashed perspective + }, + }, + { + name: "two degrees", + fixture: "link-indirect", + contents: []string{ + "NEW file override!", // always from the squashed perspective + }, + }, + { + name: "dead link", + fixture: "link-dead", + contents: []string{}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := newImageSquashResolver(img) + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(test.fixture) + require.NoError(t, err) + assert.Len(t, refs, len(test.contents)) + + for idx, loc := range refs { + + reader, err := resolver.FileContentsByLocation(loc) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + assert.Equal(t, test.contents[idx], string(actual)) + } + }) + } +} +func Test_imageSquashResolver_resolvesLinks(t *testing.T) { + tests := []struct { + name string + runner func(FileResolver) []Location + expected []Location + }{ + { + name: "by mimetype", + runner: func(resolver FileResolver) []Location { + // links should not show up when searching mimetype + actualLocations, err := resolver.FilesByMIMEType("text/plain") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "/etc/group", + }, + VirtualPath: "/etc/group", + }, + { + Coordinates: Coordinates{ + RealPath: "/etc/passwd", + }, + VirtualPath: "/etc/passwd", + }, + { + Coordinates: Coordinates{ + RealPath: "/etc/shadow", + }, + VirtualPath: "/etc/shadow", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-1.txt", + }, + VirtualPath: "/file-1.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-3.txt", + }, + VirtualPath: "/file-3.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/parent/file-4.txt", + }, + VirtualPath: "/parent/file-4.txt", + }, + }, + }, + { + name: "by glob", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("*ink-*") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "/file-3.txt", + }, + VirtualPath: "/link-within", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-2", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-1.txt", + }, + VirtualPath: "/link-1", + }, + }, + }, + { + name: "by path to degree 1 link", + runner: func(resolver FileResolver) []Location { + // links resolve to the final file + actualLocations, err := resolver.FilesByPath("/link-2") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + // we have multiple copies across layers + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-2", + }, + }, + }, + { + name: "by path to degree 2 link", + runner: func(resolver FileResolver) []Location { + // multiple links resolves to the final file + actualLocations, err := resolver.FilesByPath("/link-indirect") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + // we have multiple copies across layers + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-indirect", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := newImageSquashResolver(img) + assert.NoError(t, err) + + actualLocations := test.runner(resolver) + require.Len(t, actualLocations, len(test.expected)) + + // some operations on this resolver do not return stable results (order may be different across runs) + + expectedMap := make(map[string]string) + for _, e := range test.expected { + expectedMap[e.VirtualPath] = e.RealPath + } + + actualMap := make(map[string]string) + for _, a := range test.expected { + actualMap[a.VirtualPath] = a.RealPath + } + + assert.Equal(t, expectedMap, actualMap) + }) + } + +} diff --git a/syft/source/location.go b/syft/source/location.go index 3b4c6ff1ea7..17c3e473ce5 100644 --- a/syft/source/location.go +++ b/syft/source/location.go @@ -78,6 +78,20 @@ func NewLocationFromDirectory(responsePath string, ref file.Reference) Location } } +// NewVirtualLocationFromDirectory creates a new Location representing the given path (extracted from the ref) relative to the given directory with a separate virtual access path. +func NewVirtualLocationFromDirectory(responsePath, virtualResponsePath string, ref file.Reference) Location { + if responsePath == virtualResponsePath { + return NewLocationFromDirectory(responsePath, ref) + } + return Location{ + Coordinates: Coordinates{ + RealPath: responsePath, + }, + VirtualPath: virtualResponsePath, + ref: ref, + } +} + func (l Location) String() string { str := "" if l.ref.ID() != 0 { diff --git a/syft/source/source.go b/syft/source/source.go index 1e8551b0810..131bde101f7 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -246,7 +246,7 @@ func (s *Source) FileResolver(scope Scope) (FileResolver, error) { } resolver, err := newDirectoryResolver(s.path, exclusionFunctions...) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to create directory resolver: %w", err) } s.directoryResolver = resolver } diff --git a/syft/source/source_test.go b/syft/source/source_test.go index f253fed13bd..c15310b55be 100644 --- a/syft/source/source_test.go +++ b/syft/source/source_test.go @@ -47,55 +47,60 @@ func TestNewFromImage(t *testing.T) { func TestNewFromDirectory(t *testing.T) { testCases := []struct { - desc string - input string - expString string - inputPaths []string - expRefs int + desc string + input string + expString string + inputPaths []string + expectedRefs int + expectedErr bool }{ { - desc: "no paths exist", - input: "foobar/", - inputPaths: []string{"/opt/", "/other"}, + desc: "no paths exist", + input: "foobar/", + inputPaths: []string{"/opt/", "/other"}, + expectedErr: true, }, { - desc: "path detected", - input: "test-fixtures", - inputPaths: []string{"path-detected/.vimrc"}, - expRefs: 1, + desc: "path detected", + input: "test-fixtures", + inputPaths: []string{"path-detected/.vimrc"}, + expectedRefs: 1, }, { - desc: "directory ignored", - input: "test-fixtures", - inputPaths: []string{"path-detected"}, - expRefs: 0, + desc: "directory ignored", + input: "test-fixtures", + inputPaths: []string{"path-detected"}, + expectedRefs: 0, }, { - desc: "no files-by-path detected", - input: "test-fixtures", - inputPaths: []string{"no-path-detected"}, - expRefs: 0, + desc: "no files-by-path detected", + input: "test-fixtures", + inputPaths: []string{"no-path-detected"}, + expectedRefs: 0, }, } for _, test := range testCases { t.Run(test.desc, func(t *testing.T) { src, err := NewFromDirectory(test.input) + require.NoError(t, err) + assert.Equal(t, test.input, src.Metadata.Path) - if err != nil { - t.Errorf("could not create NewDirScope: %+v", err) - } - if src.Metadata.Path != test.input { - t.Errorf("mismatched stringer: '%s' != '%s'", src.Metadata.Path, test.input) - } resolver, err := src.FileResolver(SquashedScope) - assert.NoError(t, err) + if test.expectedErr { + if err == nil { + t.Fatal("expected an error when making the resolver but got none") + } + return + } else { + require.NoError(t, err) + } refs, err := resolver.FilesByPath(test.inputPaths...) if err != nil { t.Errorf("FilesByPath call produced an error: %+v", err) } - if len(refs) != test.expRefs { - t.Errorf("unexpected number of refs returned: %d != %d", len(refs), test.expRefs) + if len(refs) != test.expectedRefs { + t.Errorf("unexpected number of refs returned: %d != %d", len(refs), test.expectedRefs) } diff --git a/syft/source/test-fixtures/image-symlinks/Dockerfile b/syft/source/test-fixtures/image-symlinks/Dockerfile index ba637cd0dde..edeabac9c1d 100644 --- a/syft/source/test-fixtures/image-symlinks/Dockerfile +++ b/syft/source/test-fixtures/image-symlinks/Dockerfile @@ -3,6 +3,7 @@ FROM busybox:latest # LAYER 1: ADD file-1.txt . + # LAYER 2: link with previous data RUN ln -s ./file-1.txt link-1 @@ -25,6 +26,7 @@ RUN ln -s ./i-dont-exist.txt link-dead # LAYER 9: add the parent dir ADD parent /parent + # LAYER 10: parent is a symlink RUN ln -s /parent parent-link diff --git a/syft/source/test-fixtures/symlinked-root/nested/link-root b/syft/source/test-fixtures/symlinked-root/nested/link-root new file mode 120000 index 00000000000..24659224aae --- /dev/null +++ b/syft/source/test-fixtures/symlinked-root/nested/link-root @@ -0,0 +1 @@ +../real-root \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinked-root/real-root/file1.txt b/syft/source/test-fixtures/symlinked-root/real-root/file1.txt new file mode 100644 index 00000000000..5452844a200 --- /dev/null +++ b/syft/source/test-fixtures/symlinked-root/real-root/file1.txt @@ -0,0 +1 @@ +contents! diff --git a/syft/source/test-fixtures/symlinked-root/real-root/nested/file2.txt b/syft/source/test-fixtures/symlinked-root/real-root/nested/file2.txt new file mode 100644 index 00000000000..5f7e2f21348 --- /dev/null +++ b/syft/source/test-fixtures/symlinked-root/real-root/nested/file2.txt @@ -0,0 +1 @@ +more contents! diff --git a/syft/source/test-fixtures/symlinked-root/real-root/nested/linked-file1.txt b/syft/source/test-fixtures/symlinked-root/real-root/nested/linked-file1.txt new file mode 120000 index 00000000000..4e7feb2d8cb --- /dev/null +++ b/syft/source/test-fixtures/symlinked-root/real-root/nested/linked-file1.txt @@ -0,0 +1 @@ +../file1.txt \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-roots/outside/link_to_readme b/syft/source/test-fixtures/symlinks-multiple-roots/outside/link_to_readme similarity index 100% rename from syft/source/test-fixtures/symlinks-roots/outside/link_to_readme rename to syft/source/test-fixtures/symlinks-multiple-roots/outside/link_to_readme diff --git a/syft/source/test-fixtures/symlinks-roots/root/link_to_link_to_readme b/syft/source/test-fixtures/symlinks-multiple-roots/root/link_to_link_to_readme similarity index 100% rename from syft/source/test-fixtures/symlinks-roots/root/link_to_link_to_readme rename to syft/source/test-fixtures/symlinks-multiple-roots/root/link_to_link_to_readme diff --git a/syft/source/test-fixtures/symlinks-roots/root/readme b/syft/source/test-fixtures/symlinks-multiple-roots/root/readme similarity index 100% rename from syft/source/test-fixtures/symlinks-roots/root/readme rename to syft/source/test-fixtures/symlinks-multiple-roots/root/readme diff --git a/test/cli/power_user_cmd_test.go b/test/cli/power_user_cmd_test.go index 81a928fcdea..393e28e051d 100644 --- a/test/cli/power_user_cmd_test.go +++ b/test/cli/power_user_cmd_test.go @@ -83,7 +83,7 @@ func TestPowerUserCmdFlags(t *testing.T) { }, }, { - name: "defaut-secrets-dir-results-w-reveal-values", + name: "default-secrets-dir-results-w-reveal-values", env: map[string]string{ "SYFT_SECRETS_REVEAL_VALUES": "true", },