From 79a06f1f1e2406d83eda3fbec05c5bfe13ef7dd3 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Sun, 30 Jan 2022 11:26:43 -0500 Subject: [PATCH] resolve links when fetching file contents Signed-off-by: Alex Goodman --- syft/file/classification_cataloger.go | 3 +- syft/file/classification_cataloger_test.go | 59 +++++++++++++++++++ syft/file/digest_cataloger.go | 20 ++++++- syft/file/digest_cataloger_test.go | 5 +- syft/file/secrets_cataloger.go | 4 ++ .../file/test-fixtures/classifiers/positive/[ | 3 + .../classifiers/positive/busybox | 4 +- .../test-fixtures/image-busybox/Dockerfile | 1 + syft/source/all_layers_resolver.go | 16 +++++ syft/source/all_layers_resolver_test.go | 59 +++++++++++++++++++ syft/source/directory_resolver.go | 22 +++++-- syft/source/directory_resolver_test.go | 40 +++++++++++++ syft/source/image_squash_resolver.go | 24 ++++++++ syft/source/image_squash_resolver_test.go | 55 +++++++++++++++++ .../test-fixtures/image-symlinks/Dockerfile | 2 + 15 files changed, 306 insertions(+), 11 deletions(-) create mode 100644 syft/file/test-fixtures/classifiers/positive/[ mode change 100644 => 120000 syft/file/test-fixtures/classifiers/positive/busybox create mode 100644 syft/file/test-fixtures/image-busybox/Dockerfile diff --git a/syft/file/classification_cataloger.go b/syft/file/classification_cataloger.go index 325db0e12a0..3c23da3702c 100644 --- a/syft/file/classification_cataloger.go +++ b/syft/file/classification_cataloger.go @@ -23,7 +23,8 @@ func (i *ClassificationCataloger) Catalog(resolver source.FileResolver) (map[sou for _, classifier := range i.classifiers { result, err := classifier.Classify(resolver, location) if err != nil { - return nil, err + log.Warnf("file classification cataloger failed with class=%q at location=%+v: %+v", classifier.Class, location, err) + continue } if result != nil { results[location.Coordinates] = append(results[location.Coordinates], *result) diff --git a/syft/file/classification_cataloger_test.go b/syft/file/classification_cataloger_test.go index 365dafd0fc9..388048bc2ee 100644 --- a/syft/file/classification_cataloger_test.go +++ b/syft/file/classification_cataloger_test.go @@ -1,6 +1,7 @@ package file import ( + "github.com/anchore/stereoscope/pkg/imagetest" "testing" "github.com/anchore/syft/syft/source" @@ -134,6 +135,64 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { } } +func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) { + tests := []struct { + name string + fixtureImage string + location string + expected []Classification + expectedErr func(assert.TestingT, error, ...interface{}) bool + }{ + { + name: "busybox-regression", + fixtureImage: "image-busybox", + location: "/bin/busybox", + expected: []Classification{ + { + Class: "busybox-binary", + Metadata: map[string]string{ + "version": "1.35.0", + }, + }, + }, + expectedErr: assert.NoError, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + + c, err := NewClassificationCataloger(DefaultClassifiers) + test.expectedErr(t, err) + + img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureImage) + src, err := source.NewFromImage(img, "test-img") + test.expectedErr(t, err) + + resolver, err := src.FileResolver(source.SquashedScope) + test.expectedErr(t, err) + + actualResults, err := c.Catalog(resolver) + test.expectedErr(t, err) + + loc := source.NewLocation(test.location) + + ok := false + for actual_loc, actual_classification := range actualResults { + if loc.RealPath == actual_loc.RealPath { + ok = true + assert.Equal(t, test.expected, actual_classification) + } + } + + if !ok { + t.Fatalf("could not find test location=%q", test.location) + } + + }) + } +} + func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) { c, err := NewClassificationCataloger(DefaultClassifiers) diff --git a/syft/file/digest_cataloger.go b/syft/file/digest_cataloger.go index 502173a23f6..8660410405e 100644 --- a/syft/file/digest_cataloger.go +++ b/syft/file/digest_cataloger.go @@ -2,6 +2,7 @@ package file import ( "crypto" + "errors" "fmt" "hash" "io" @@ -19,6 +20,8 @@ import ( "github.com/anchore/syft/syft/source" ) +var errUndigestableFile = errors.New("undigestable file") + type DigestsCataloger struct { hashes []crypto.Hash } @@ -39,8 +42,13 @@ func (i *DigestsCataloger) Catalog(resolver source.FileResolver) (map[source.Coo for _, location := range locations { stage.Current = location.RealPath result, err := i.catalogLocation(resolver, location) + + if errors.Is(err, errUndigestableFile) { + continue + } + if internal.IsErrPathPermission(err) { - log.Debugf("file digests cataloger skipping - %+v", err) + log.Debugf("file digests cataloger skipping %q: %+v", location.RealPath, err) continue } @@ -56,6 +64,16 @@ func (i *DigestsCataloger) Catalog(resolver source.FileResolver) (map[source.Coo } func (i *DigestsCataloger) catalogLocation(resolver source.FileResolver, location source.Location) ([]Digest, error) { + meta, err := resolver.FileMetadataByLocation(location) + if err != nil { + return nil, err + } + + // we should only attempt to report digests for files that are regular files (don't attempt to resolve links) + if meta.Type != source.RegularFile { + return nil, errUndigestableFile + } + contentReader, err := resolver.FileContentsByLocation(location) if err != nil { return nil, err diff --git a/syft/file/digest_cataloger_test.go b/syft/file/digest_cataloger_test.go index ba06e408549..8ec65699081 100644 --- a/syft/file/digest_cataloger_test.go +++ b/syft/file/digest_cataloger_test.go @@ -65,10 +65,11 @@ func TestDigestsCataloger_SimpleContents(t *testing.T) { expected: testDigests(t, regularFiles, crypto.MD5, crypto.SHA1, crypto.SHA256), }, { - name: "directory returns error", + name: "directory is ignored", digests: []crypto.Hash{crypto.MD5}, files: []string{"test-fixtures/last"}, - catalogErr: true, + expected: make(map[source.Coordinates][]Digest), // empty + catalogErr: false, }, } diff --git a/syft/file/secrets_cataloger.go b/syft/file/secrets_cataloger.go index 37ec20a746c..ed0a4a95db7 100644 --- a/syft/file/secrets_cataloger.go +++ b/syft/file/secrets_cataloger.go @@ -75,6 +75,10 @@ func (i *SecretsCataloger) catalogLocation(resolver source.FileResolver, locatio return nil, err } + if metadata.Size == 0 { + return nil, nil + } + if i.skipFilesAboveSize > 0 && metadata.Size > i.skipFilesAboveSize { return nil, nil } diff --git a/syft/file/test-fixtures/classifiers/positive/[ b/syft/file/test-fixtures/classifiers/positive/[ new file mode 100644 index 00000000000..7829d71b941 --- /dev/null +++ b/syft/file/test-fixtures/classifiers/positive/[ @@ -0,0 +1,3 @@ +# note: this SHOULD match as busybox 3.33.3 + +noise!BusyBox v3.33.3!noise \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/positive/busybox b/syft/file/test-fixtures/classifiers/positive/busybox deleted file mode 100644 index 7829d71b941..00000000000 --- a/syft/file/test-fixtures/classifiers/positive/busybox +++ /dev/null @@ -1,3 +0,0 @@ -# note: this SHOULD match as busybox 3.33.3 - -noise!BusyBox v3.33.3!noise \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/positive/busybox b/syft/file/test-fixtures/classifiers/positive/busybox new file mode 120000 index 00000000000..c3e3150b864 --- /dev/null +++ b/syft/file/test-fixtures/classifiers/positive/busybox @@ -0,0 +1 @@ +./[ \ No newline at end of file diff --git a/syft/file/test-fixtures/image-busybox/Dockerfile b/syft/file/test-fixtures/image-busybox/Dockerfile new file mode 100644 index 00000000000..94b54d2f4fe --- /dev/null +++ b/syft/file/test-fixtures/image-busybox/Dockerfile @@ -0,0 +1 @@ +FROM busybox:1.35 \ No newline at end of file diff --git a/syft/source/all_layers_resolver.go b/syft/source/all_layers_resolver.go index 02efd625850..c2766253bc9 100644 --- a/syft/source/all_layers_resolver.go +++ b/syft/source/all_layers_resolver.go @@ -184,6 +184,22 @@ func (r *allLayersResolver) RelativeFileByPath(location Location, path string) * // FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer. // If the path does not exist an error is returned. func (r *allLayersResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { + entry, err := r.img.FileCatalog.Get(location.ref) + if err != nil { + return nil, fmt.Errorf("unable to get metadata for path=%q from file catalog: %w", location.RealPath, err) + } + + switch entry.Metadata.TypeFlag { + case tar.TypeSymlink, tar.TypeLink: + // the location we are searching may be a symlink, we should always work with the resolved file + newLocation := r.RelativeFileByPath(location, location.VirtualPath) + if newLocation == nil { + // this is a dead link + return nil, fmt.Errorf("no contents for location=%q", location.VirtualPath) + } + location = *newLocation + } + return r.img.FileContentsByRef(location.ref) } diff --git a/syft/source/all_layers_resolver_test.go b/syft/source/all_layers_resolver_test.go index d6c167e5f86..3fe54ccaa0c 100644 --- a/syft/source/all_layers_resolver_test.go +++ b/syft/source/all_layers_resolver_test.go @@ -1,6 +1,8 @@ package source import ( + "github.com/stretchr/testify/require" + "io" "testing" "github.com/stretchr/testify/assert" @@ -301,3 +303,60 @@ func Test_imageAllLayersResolver_hasFilesystemIDInLocation(t *testing.T) { } } + +func TestAllLayersImageResolver_FilesContents(t *testing.T) { + + tests := []struct { + name string + fixture string + contents []string + }{ + { + name: "one degree", + fixture: "link-2", + contents: []string{ + "file 2!", // from the first resolved layer's perspective + "NEW file override!", // from the second resolved layers perspective + }, + }, + { + name: "two degrees", + fixture: "link-indirect", + contents: []string{ + "file 2!", + "NEW file override!", + }, + }, + { + name: "dead link", + fixture: "link-dead", + contents: []string{}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := newAllLayersResolver(img) + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(test.fixture) + require.NoError(t, err) + + // the given path should have an overridden file + require.Len(t, refs, len(test.contents)) + + for idx, loc := range refs { + reader, err := resolver.FileContentsByLocation(loc) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + assert.Equal(t, test.contents[idx], string(actual)) + } + + }) + } +} diff --git a/syft/source/directory_resolver.go b/syft/source/directory_resolver.go index 6a3fe31e69b..b6c155d222a 100644 --- a/syft/source/directory_resolver.go +++ b/syft/source/directory_resolver.go @@ -305,8 +305,15 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) continue } + // we should be resolving symlinks and preserving this information as a VirtualPath to the real file + evaluatedPath, err := filepath.EvalSymlinks(userStrPath) + if err != nil { + log.Warnf("unable to evaluate symlink for path=%q : %+v", userPath, err) + continue + } + // TODO: why not use stored metadata? - fileMeta, err := os.Stat(userStrPath) + fileMeta, err := os.Stat(evaluatedPath) if errors.Is(err, os.ErrNotExist) { // note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform // specific, but essentially hints at the same overall problem (that the path does not exist). Such an @@ -317,7 +324,7 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) // invalid paths. This logging statement is meant to raise IO or permissions related problems. var pathErr *os.PathError if !errors.As(err, &pathErr) { - log.Warnf("path is not valid (%s): %+v", userStrPath, err) + log.Warnf("path is not valid (%s): %+v", evaluatedPath, err) } continue } @@ -329,11 +336,17 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) if runtime.GOOS == WindowsOS { userStrPath = windowsToPosix(userStrPath) + evaluatedPath = windowsToPosix(evaluatedPath) } exists, ref, err := r.fileTree.File(file.Path(userStrPath)) if err == nil && exists { - references = append(references, NewLocationFromDirectory(r.responsePath(userStrPath), *ref)) + loc := NewLocationFromDirectory(r.responsePath(userStrPath), *ref) + if evaluatedPath != userStrPath { + loc.VirtualPath = r.responsePath(evaluatedPath) + } + + references = append(references, loc) } } @@ -404,7 +417,8 @@ func (r *directoryResolver) AllLocations() <-chan Location { results := make(chan Location) go func() { defer close(results) - for _, ref := range r.fileTree.AllFiles() { + // this should be all non-directory types + for _, ref := range r.fileTree.AllFiles(file.TypeReg, file.TypeSymlink, file.TypeHardLink, file.TypeBlockDevice, file.TypeCharacterDevice, file.TypeFifo) { results <- NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref) } }() diff --git a/syft/source/directory_resolver_test.go b/syft/source/directory_resolver_test.go index a95634aaf3b..2c4a4aa3aa7 100644 --- a/syft/source/directory_resolver_test.go +++ b/syft/source/directory_resolver_test.go @@ -4,6 +4,7 @@ package source import ( + "io" "io/fs" "io/ioutil" "os" @@ -259,6 +260,45 @@ func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { assert.Equal(t, "image-symlinks/file-1.txt", refs[0].RealPath) } +func TestDirectoryResolver_FilesByPath_ResolvesSymlinks(t *testing.T) { + + tests := []struct { + name string + fixture string + }{ + { + name: "one degree", + fixture: "link_to_new_readme", + }, + { + name: "two degrees", + fixture: "link_to_link_to_new_readme", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple") + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(test.fixture) + require.NoError(t, err) + assert.Len(t, refs, 1) + + reader, err := resolver.FileContentsByLocation(refs[0]) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + expected, err := os.ReadFile("test-fixtures/symlinks-simple/readme") + require.NoError(t, err) + + assert.Equal(t, string(expected), string(actual)) + }) + } +} + func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { // let's make certain that "dev/place" is not ignored, since it is not "/dev/place" resolver, err := newDirectoryResolver("test-fixtures/system_paths/target") diff --git a/syft/source/image_squash_resolver.go b/syft/source/image_squash_resolver.go index 914e546aa63..2e1d0028f10 100644 --- a/syft/source/image_squash_resolver.go +++ b/syft/source/image_squash_resolver.go @@ -1,6 +1,7 @@ package source import ( + "archive/tar" "fmt" "io" @@ -137,6 +138,29 @@ func (r *imageSquashResolver) RelativeFileByPath(_ Location, path string) *Locat // FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer. // If the path does not exist an error is returned. func (r *imageSquashResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { + entry, err := r.img.FileCatalog.Get(location.ref) + if err != nil { + return nil, fmt.Errorf("unable to get metadata for path=%q from file catalog: %w", location.RealPath, err) + } + + switch entry.Metadata.TypeFlag { + case tar.TypeSymlink, tar.TypeLink: + // the location we are searching may be a symlink, we should always work with the resolved file + locations, err := r.FilesByPath(location.RealPath) + if err != nil { + return nil, fmt.Errorf("failed to resolve content location at location=%+v: %w", location, err) + } + + switch len(locations) { + case 0: + return nil, fmt.Errorf("link resolution failed while resolving content location: %+v", location) + case 1: + location = locations[0] + default: + return nil, fmt.Errorf("link resolution resulted in multiple results while resolving content location: %+v", location) + } + } + return r.img.FileContentsByRef(location.ref) } diff --git a/syft/source/image_squash_resolver_test.go b/syft/source/image_squash_resolver_test.go index 3727431c4e3..8d4b07d65d1 100644 --- a/syft/source/image_squash_resolver_test.go +++ b/syft/source/image_squash_resolver_test.go @@ -1,6 +1,8 @@ package source import ( + "github.com/stretchr/testify/require" + "io" "testing" "github.com/scylladb/go-set/strset" @@ -288,3 +290,56 @@ func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) { } } + +func TestSquashImageResolver_FilesContents(t *testing.T) { + + tests := []struct { + name string + fixture string + contents []string + }{ + { + name: "one degree", + fixture: "link-2", + contents: []string{ + "NEW file override!", // always from the squashed perspective + }, + }, + { + name: "two degrees", + fixture: "link-indirect", + contents: []string{ + "NEW file override!", // always from the squashed perspective + }, + }, + { + name: "dead link", + fixture: "link-dead", + contents: []string{}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := newImageSquashResolver(img) + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(test.fixture) + require.NoError(t, err) + assert.Len(t, refs, len(test.contents)) + + for idx, loc := range refs { + + reader, err := resolver.FileContentsByLocation(loc) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + assert.Equal(t, test.contents[idx], string(actual)) + } + }) + } +} diff --git a/syft/source/test-fixtures/image-symlinks/Dockerfile b/syft/source/test-fixtures/image-symlinks/Dockerfile index ba637cd0dde..edeabac9c1d 100644 --- a/syft/source/test-fixtures/image-symlinks/Dockerfile +++ b/syft/source/test-fixtures/image-symlinks/Dockerfile @@ -3,6 +3,7 @@ FROM busybox:latest # LAYER 1: ADD file-1.txt . + # LAYER 2: link with previous data RUN ln -s ./file-1.txt link-1 @@ -25,6 +26,7 @@ RUN ln -s ./i-dont-exist.txt link-dead # LAYER 9: add the parent dir ADD parent /parent + # LAYER 10: parent is a symlink RUN ln -s /parent parent-link