From 4be72fda34ee67d133f51d191f4e3ca9f52e31b7 Mon Sep 17 00:00:00 2001 From: Jonas Xavier Date: Thu, 17 Feb 2022 14:05:11 -0800 Subject: [PATCH] ignore minor parsing error when reading dpkg status files (#786) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ignore minor parsing error when reading dpkg status files helps with https://github.com/anchore/syft/issues/733 Question: should we add a smarter parser to guess approximate installed-size value? Signed-off-by: Jonas Galvão Xavier * add datasize lib to help dpkg parsing added unit tests to expand coverage of dpkg parsing Signed-off-by: Jonas Galvão Xavier * drop parse error added unit tests to handleNewKeyValue Signed-off-by: Jonas Galvão Xavier * don't return parsing errors from dpkg Signed-off-by: Jonas Galvão Xavier * go mod tidy Signed-off-by: Jonas Galvão Xavier * test higher level functions Signed-off-by: Jonas Galvão Xavier * return parsing err to let cataloger handle it Signed-off-by: Jonas Galvão Xavier * feedback changes Signed-off-by: Jonas Galvão Xavier * ignore key parsing error log warning with relevant context Signed-off-by: Jonas Galvão Xavier * go mod tidy Signed-off-by: Jonas Galvão Xavier * add context info to log lines simpler error assertion Signed-off-by: Jonas Galvão Xavier * use error.As to assert error in chain Signed-off-by: Jonas Galvão Xavier --- go.mod | 2 +- syft/pkg/cataloger/deb/cataloger.go | 3 +- syft/pkg/cataloger/deb/cataloger_test.go | 1 - syft/pkg/cataloger/deb/parse_dpkg_status.go | 15 +- .../cataloger/deb/parse_dpkg_status_test.go | 161 +++++++++++++++++- .../test-fixtures/status/installed-size-4KB | 31 ++++ 6 files changed, 197 insertions(+), 16 deletions(-) create mode 100644 syft/pkg/cataloger/deb/test-fixtures/status/installed-size-4KB diff --git a/go.mod b/go.mod index 6534436c2b6..b9ac9573ba6 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/gookit/color v1.2.7 github.com/hashicorp/go-multierror v1.1.0 github.com/jinzhu/copier v0.3.2 + github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect github.com/mholt/archiver/v3 v3.5.1 github.com/mitchellh/go-homedir v1.1.0 github.com/mitchellh/hashstructure/v2 v2.0.2 @@ -85,7 +86,6 @@ require ( github.com/mattn/go-colorable v0.0.9 // indirect github.com/mattn/go-isatty v0.0.6 // indirect github.com/mattn/go-runewidth v0.0.7 // indirect - github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.2 // indirect diff --git a/syft/pkg/cataloger/deb/cataloger.go b/syft/pkg/cataloger/deb/cataloger.go index ea431a11b7d..f0c3b3ea85b 100644 --- a/syft/pkg/cataloger/deb/cataloger.go +++ b/syft/pkg/cataloger/deb/cataloger.go @@ -53,7 +53,8 @@ func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []arti pkgs, err := parseDpkgStatus(dbContents) internal.CloseAndLogError(dbContents, dbLocation.VirtualPath) if err != nil { - return nil, nil, fmt.Errorf("unable to catalog dpkg package=%+v: %w", dbLocation.RealPath, err) + log.Warnf("dpkg cataloger: unable to catalog package=%+v: %w", dbLocation.RealPath, err) + continue } for i := range pkgs { diff --git a/syft/pkg/cataloger/deb/cataloger_test.go b/syft/pkg/cataloger/deb/cataloger_test.go index 4568c72b97e..72a837f0640 100644 --- a/syft/pkg/cataloger/deb/cataloger_test.go +++ b/syft/pkg/cataloger/deb/cataloger_test.go @@ -12,7 +12,6 @@ import ( ) func TestDpkgCataloger(t *testing.T) { - tests := []struct { name string sources map[string][]string diff --git a/syft/pkg/cataloger/deb/parse_dpkg_status.go b/syft/pkg/cataloger/deb/parse_dpkg_status.go index c2f84c11553..4766a60606e 100644 --- a/syft/pkg/cataloger/deb/parse_dpkg_status.go +++ b/syft/pkg/cataloger/deb/parse_dpkg_status.go @@ -6,12 +6,12 @@ import ( "fmt" "io" "regexp" - "strconv" "strings" "github.com/anchore/syft/internal" - + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/pkg" + "github.com/dustin/go-humanize" "github.com/mitchellh/mapstructure" ) @@ -133,7 +133,8 @@ func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) { var val interface{} key, val, err = handleNewKeyValue(line) if err != nil { - return nil, err + log.Warnf("parsing dpkg status: extracting key-value from line: %s err: %v", line, err) + continue } if _, ok := dpkgFields[key]; ok { @@ -154,9 +155,9 @@ func extractSourceVersion(source string) (string, string) { } // handleNewKeyValue parse a new key-value pair from the given unprocessed line -func handleNewKeyValue(line string) (string, interface{}, error) { +func handleNewKeyValue(line string) (key string, val interface{}, err error) { if i := strings.Index(line, ":"); i > 0 { - var key = strings.TrimSpace(line[0:i]) + key = strings.TrimSpace(line[0:i]) // mapstruct cant handle "-" key = strings.ReplaceAll(key, "-", "") val := strings.TrimSpace(line[i+1:]) @@ -164,11 +165,11 @@ func handleNewKeyValue(line string) (string, interface{}, error) { // further processing of values based on the key that was discovered switch key { case "InstalledSize": - numVal, err := strconv.Atoi(val) + s, err := humanize.ParseBytes(val) if err != nil { return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err) } - return key, numVal, nil + return key, int(s), nil default: return key, val, nil } diff --git a/syft/pkg/cataloger/deb/parse_dpkg_status_test.go b/syft/pkg/cataloger/deb/parse_dpkg_status_test.go index f61b23058d6..0611b2c8e91 100644 --- a/syft/pkg/cataloger/deb/parse_dpkg_status_test.go +++ b/syft/pkg/cataloger/deb/parse_dpkg_status_test.go @@ -2,10 +2,15 @@ package deb import ( "bufio" + "errors" + "fmt" "os" + "path/filepath" + "strings" "testing" "github.com/anchore/syft/syft/file" + "github.com/stretchr/testify/assert" "github.com/anchore/syft/syft/pkg" "github.com/go-test/deep" @@ -20,11 +25,13 @@ func compareEntries(t *testing.T, left, right pkg.DpkgMetadata) { func TestSinglePackage(t *testing.T) { tests := []struct { - name string - expected pkg.DpkgMetadata + name string + expected pkg.DpkgMetadata + fixturePath string }{ { - name: "Test Single Package", + name: "Test Single Package", + fixturePath: filepath.Join("test-fixtures", "status", "single"), expected: pkg.DpkgMetadata{ Package: "apt", Source: "apt-dev", @@ -68,11 +75,22 @@ func TestSinglePackage(t *testing.T) { }, }, }, - } + { + name: "parse storage notation", + fixturePath: filepath.Join("test-fixtures", "status", "installed-size-4KB"), + expected: pkg.DpkgMetadata{ + Package: "apt", + Source: "apt-dev", + Version: "1.8.2", + Architecture: "amd64", + InstalledSize: 4000, + Maintainer: "APT Development Team ", + }, + }} for _, test := range tests { t.Run(test.name, func(t *testing.T) { - file, err := os.Open("test-fixtures/status/single") + file, err := os.Open(test.fixturePath) if err != nil { t.Fatal("Unable to read test_fixtures/single: ", err) } @@ -204,7 +222,6 @@ func TestMultiplePackages(t *testing.T) { } func TestSourceVersionExtract(t *testing.T) { - tests := []struct { name string input string @@ -242,3 +259,135 @@ func TestSourceVersionExtract(t *testing.T) { }) } } + +func assertAs(expected error) assert.ErrorAssertionFunc { + return func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorAs(t, err, &expected) + } +} + +func Test_parseDpkgStatus(t *testing.T) { + tests := []struct { + name string + input string + want []pkg.Package + wantErr assert.ErrorAssertionFunc + }{ + { + name: "no more packages", + input: `Package: apt`, + wantErr: assert.NoError, + }, + { + name: "duplicated key", + input: `Package: apt +Package: apt-get + +`, + wantErr: assertAs(errors.New("duplicate key discovered: Package")), + }, + { + name: "no match for continuation", + input: ` Package: apt + +`, + wantErr: assertAs(errors.New("no match for continuation: line: ' Package: apt'")), + }, + { + name: "find keys", + input: `Package: apt +Status: install ok installed +Installed-Size: 10kib + +`, + want: []pkg.Package{ + { + Name: "apt", + Type: "deb", + MetadataType: "DpkgMetadata", + Metadata: pkg.DpkgMetadata{ + Package: "apt", + InstalledSize: 10240, + Files: []pkg.DpkgFileRecord{}, + }, + }, + }, + wantErr: assert.NoError, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := bufio.NewReader(strings.NewReader(tt.input)) + got, err := parseDpkgStatus(r) + tt.wantErr(t, err, fmt.Sprintf("parseDpkgStatus")) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_handleNewKeyValue(t *testing.T) { + tests := []struct { + name string + line string + wantKey string + wantVal interface{} + wantErr assert.ErrorAssertionFunc + }{ + { + name: "cannot parse field", + line: "blabla", + wantErr: assertAs(errors.New("cannot parse field from line: 'blabla'")), + }, + { + name: "parse field", + line: "key: val", + wantKey: "key", + wantVal: "val", + wantErr: assert.NoError, + }, + { + name: "parse installed size", + line: "InstalledSize: 128", + wantKey: "InstalledSize", + wantVal: 128, + wantErr: assert.NoError, + }, + { + name: "parse installed kib size", + line: "InstalledSize: 1kib", + wantKey: "InstalledSize", + wantVal: 1024, + wantErr: assert.NoError, + }, + { + name: "parse installed kb size", + line: "InstalledSize: 1kb", + wantKey: "InstalledSize", + wantVal: 1000, + wantErr: assert.NoError, + }, + { + name: "parse installed-size mb", + line: "Installed-Size: 1 mb", + wantKey: "InstalledSize", + wantVal: 1000000, + wantErr: assert.NoError, + }, + { + name: "fail parsing installed-size", + line: "Installed-Size: 1bla", + wantKey: "", + wantErr: assertAs(fmt.Errorf("unhandled size name: %s", "bla")), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotKey, gotVal, err := handleNewKeyValue(tt.line) + tt.wantErr(t, err, fmt.Sprintf("handleNewKeyValue(%v)", tt.line)) + + assert.Equalf(t, tt.wantKey, gotKey, "handleNewKeyValue(%v)", tt.line) + assert.Equalf(t, tt.wantVal, gotVal, "handleNewKeyValue(%v)", tt.line) + }) + } +} diff --git a/syft/pkg/cataloger/deb/test-fixtures/status/installed-size-4KB b/syft/pkg/cataloger/deb/test-fixtures/status/installed-size-4KB new file mode 100644 index 00000000000..5a7cbd19a88 --- /dev/null +++ b/syft/pkg/cataloger/deb/test-fixtures/status/installed-size-4KB @@ -0,0 +1,31 @@ +Package: apt +Status: install ok installed +Priority: required +Section: admin +Installed-Size: 4KB +Maintainer: APT Development Team +Architecture: amd64 +Version: 1.8.2 +Source: apt-dev +Replaces: apt-transport-https (<< 1.5~alpha4~), apt-utils (<< 1.3~exp2~) +Provides: apt-transport-https (= 1.8.2) +Depends: adduser, gpgv | gpgv2 | gpgv1, debian-archive-keyring, libapt-pkg5.0 (>= 1.7.0~alpha3~), libc6 (>= 2.15), libgcc1 (>= 1:3.0), libgnutls30 (>= 3.6.6), libseccomp2 (>= 1.0.1), libstdc++6 (>= 5.2) +Recommends: ca-certificates +Suggests: apt-doc, aptitude | synaptic | wajig, dpkg-dev (>= 1.17.2), gnupg | gnupg2 | gnupg1, powermgmt-base +Breaks: apt-transport-https (<< 1.5~alpha4~), apt-utils (<< 1.3~exp2~), aptitude (<< 0.8.10) +Conffiles: +Description: commandline package manager + This package provides commandline tools for searching and + managing as well as querying information about packages + as a low-level access to all features of the libapt-pkg library. + . + These include: + * apt-get for retrieval of packages and information about them + from authenticated sources and for installation, upgrade and + removal of packages together with their dependencies + * apt-cache for querying available information about installed + as well as installable packages + * apt-cdrom to use removable media as a source for packages + * apt-config as an interface to the configuration settings + * apt-key as an interface to manage authentication keys +