From ae5abc26f6b23e87d8074745149b20cadfbe60df Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 23 Feb 2022 15:18:12 -0500 Subject: [PATCH] Allow for CPE strings that can later be sanitized (#844) --- syft/pkg/cpe.go | 40 ++++++++++++++++++--- syft/pkg/cpe_test.go | 82 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 103 insertions(+), 19 deletions(-) diff --git a/syft/pkg/cpe.go b/syft/pkg/cpe.go index db0194d9217c..9c87df709b36 100644 --- a/syft/pkg/cpe.go +++ b/syft/pkg/cpe.go @@ -10,6 +10,10 @@ import ( type CPE = wfn.Attributes +const ( + allowedCPEPunctuation = "-!\"#$%&'()+,./:;<=>@[]^`{|}~" +) + // This regex string is taken from // https://csrc.nist.gov/schema/cpe/2.3/cpe-naming_2.3.xsd which has the official cpe spec // This first part matches CPE urls and the second part matches binding strings @@ -20,12 +24,35 @@ const cpeRegexString = ((`^([c][pP][eE]:/[AHOaho]?(:[A-Za-z0-9\._\-~%]*){0,6})`) var cpeRegex = regexp.MustCompile(cpeRegexString) +// NewCPE will parse a formatted CPE string and return a CPE object. Some input, such as the existence of whitespace +// characters is allowed, however, a more strict validation is done after this sanitization process. func NewCPE(cpeStr string) (CPE, error) { + // get a CPE object based on the given string --don't validate yet since it may be possible to escape select cases on the callers behalf + c, err := newCPEWithoutValidation(cpeStr) + if err != nil { + return CPE{}, fmt.Errorf("unable to prase CPE string: %w", err) + } + + // ensure that this CPE can be validated after being fully sanitized + if validateCPEString(CPEString(c)) != nil { + return CPE{}, err + } + + // we don't return the sanitized string, as this is a concern for later when creating CPE strings. In fact, since + // sanitization is lossy (whitespace is replaced, not escaped) it's important that the raw values are left as. + return c, nil +} + +func validateCPEString(cpeStr string) error { // We should filter out all CPEs that do not match the official CPE regex // The facebook nvdtools parser can sometimes incorrectly parse invalid CPE strings if !cpeRegex.MatchString(cpeStr) { - return CPE{}, fmt.Errorf("failed to parse CPE=%q as it doesn't match the regex=%s", cpeStr, cpeRegexString) + return fmt.Errorf("failed to parse CPE=%q as it doesn't match the regex=%s", cpeStr, cpeRegexString) } + return nil +} + +func newCPEWithoutValidation(cpeStr string) (CPE, error) { value, err := wfn.Parse(cpeStr) if err != nil { return CPE{}, fmt.Errorf("failed to parse CPE=%q: %w", cpeStr, err) @@ -60,6 +87,9 @@ func MustCPE(cpeStr string) CPE { } func normalizeCpeField(field string) string { + // replace spaces with underscores (per section 5.3.2 of the CPE spec v 2.3) + field = strings.ReplaceAll(field, " ", "_") + // keep dashes and forward slashes unescaped if field == "*" { return wfn.Any @@ -71,10 +101,9 @@ func normalizeCpeField(field string) string { // It correctly removes slashes that are followed by allowed puncts. // This is to allow for a correct round trip parsing of cpes with quoted characters. func stripSlashes(s string) string { - const allowedPunct = "-!\"#$%&'()+,./:;<=>@[]^`{|}!~" sb := strings.Builder{} for i, c := range s { - if c == '\\' && i+1 < len(s) && strings.ContainsRune(allowedPunct, rune(s[i+1])) { + if c == '\\' && i+1 < len(s) && strings.ContainsRune(allowedCPEPunctuation, rune(s[i+1])) { continue } else { sb.WriteRune(c) @@ -110,12 +139,13 @@ func CPEString(c CPE) string { // end up becoming "prefix" instead causing loss of information and // incorrect CPEs being generated. func sanitize(s string) string { - const allowedPunct = "-!\"#$%&'()+,./:;<=>@[]^`{|}!~" // replace spaces with underscores in := strings.ReplaceAll(s, " ", "_") + + // escape allowable punctuation per section 5.3.2 in the CPE 2.3 spec sb := strings.Builder{} for _, c := range in { - if strings.ContainsRune(allowedPunct, c) { + if strings.ContainsRune(allowedCPEPunctuation, c) { sb.WriteRune('\\') } sb.WriteRune(c) diff --git a/syft/pkg/cpe_test.go b/syft/pkg/cpe_test.go index a74ab05e4e7a..1f610541c477 100644 --- a/syft/pkg/cpe_test.go +++ b/syft/pkg/cpe_test.go @@ -4,9 +4,11 @@ import ( "encoding/json" "fmt" "io/ioutil" + "strings" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func must(c CPE, e error) CPE { @@ -91,10 +93,9 @@ func Test_CPEParser(t *testing.T) { WFN CPE `json:"wfn"` }{} out, err := ioutil.ReadFile("test-fixtures/cpe-data.json") - if err != nil { - t.Fatal("Unable to read test-fixtures/cpe-data.json: ", err) - } - json.Unmarshal(out, &testCases) + require.NoError(t, err) + require.NoError(t, json.Unmarshal(out, &testCases)) + for _, test := range testCases { t.Run(test.CPEString, func(t *testing.T) { c1, err := NewCPE(test.CPEString) @@ -110,19 +111,72 @@ func Test_CPEParser(t *testing.T) { } func Test_InvalidCPE(t *testing.T) { + type testcase struct { + name string + in string + expected string + expectedErr bool + } - testCases := []string{ - "cpe:2.3:a:some-vendor:name:1:3.2:*:*:*:*:*:*:*", - "cpe:2.3:a:some-vendor:name:1^:*:*:*:*:*:*:*", - "cpe:2.3:a:some-vendor:name:**:*:*:*:*:*:*:*", - "cpe:2.3:a:some-vendor:name:*\\:*:*:*:*:*:*:*", + tests := []testcase{ + { + // 5.3.2: The underscore (x5f) MAY be used, and it SHOULD be used in place of whitespace characters (which SHALL NOT be used) + name: "translates spaces", + in: "cpe:2.3:a:some-vendor:name:1 2:*:*:*:*:*:*:*", + expected: "cpe:2.3:a:some-vendor:name:1_2:*:*:*:*:*:*:*", + }, + { + // it isn't easily possible in the string formatted string to detect improper escaping of : (it will fail parsing) + name: "unescaped ':' cannot be helped -- too many fields", + in: "cpe:2.3:a:some-vendor:name:::*:*:*:*:*:*:*", + expectedErr: true, + }, + { + name: "too few fields", + in: "cpe:2.3:a:some-vendor:name:*:*:*:*:*:*:*", + expected: "cpe:2.3:a:some-vendor:name:*:*:*:*:*:*:*:*", + }, + // Note: though the CPE spec does not allow for ? and * as escaped character input, these seem to be allowed in + // the NVD CPE validator for this reason these edge cases were removed } - for _, test := range testCases { - t.Run(test, func(t *testing.T) { - _, err := NewCPE(test) - assert.Error(t, err) - assert.Contains(t, fmt.Sprint(err), "regex") + // the wfn library does not account for escapes of . and - + exceptions := ".-" + // it isn't easily possible in the string formatted string to detect improper escaping of : (it will fail parsing) + skip := ":" + + // make escape exceptions for section 5.3.2 of the CPE spec (2.3) + for _, char := range allowedCPEPunctuation { + if strings.Contains(skip, string(char)) { + continue + } + + in := fmt.Sprintf("cpe:2.3:a:some-vendor:name:*:%s:*:*:*:*:*:*", string(char)) + exp := fmt.Sprintf(`cpe:2.3:a:some-vendor:name:*:\%s:*:*:*:*:*:*`, string(char)) + if strings.Contains(exceptions, string(char)) { + exp = in + } + + tests = append(tests, testcase{ + name: fmt.Sprintf("allowes future escape of character (%s)", string(char)), + in: in, + expected: exp, + expectedErr: false, + }) + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + c, err := NewCPE(test.in) + if test.expectedErr { + assert.Error(t, err) + if t.Failed() { + t.Logf("got CPE: %q details: %+v", CPEString(c), c) + } + return + } + require.NoError(t, err) + assert.Equal(t, test.expected, CPEString(c)) }) } }