Skip to content

Commit

Permalink
Add support for multifields added by ecs@mappings (#2035)
Browse files Browse the repository at this point in the history
`ecs@mappings` component template adds multifields that are not
defined in ECS or in packages using these mappings. These fields
are present in the ingested documents and cannot be validated by
elastic-package.

Add the definitions for these multifields when `ecs@mappings` is
used and no other definition is present.

Also, improve the error reported when there is no definition for a
possible multifield.
  • Loading branch information
jsoriano authored Aug 27, 2024
1 parent 45fbd0c commit 296eaa9
Show file tree
Hide file tree
Showing 50 changed files with 22,300 additions and 51 deletions.
168 changes: 144 additions & 24 deletions internal/fields/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,15 @@ func initDependencyManagement(packageRoot string, specVersion semver.Version, im
return nil, nil, err
}
logger.Debugf("Imported ECS fields definition from external schema for validation (embedded in package: %v, stack uses ecs@mappings template: %v)", packageEmbedsEcsMappings, stackSupportsEcsMapping)

schema = ecsSchema
}

// ecs@mappings adds additional multifields that are not defined anywhere.
// Adding them in all cases so packages can be tested in versions of the stack that
// add the ecs@mappings component template.
schema = appendECSMappingMultifields(schema, "")

return fdm, schema, nil
}

Expand Down Expand Up @@ -383,6 +389,86 @@ func allVersionsIncludeECS(kibanaConstraints *semver.Constraints) bool {
// return !kibanaConstraints.Check(lastStackVersionWithoutEcsMappings)
}

func ecsPathWithMultifieldsMatch(name string) bool {
suffixes := []string{
// From https://github.com/elastic/elasticsearch/blob/34a78f3cf3e91cd13f51f1f4f8e378f8ed244a2b/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json#L87
".body.content",
"url.full",
"url.original",

// From https://github.com/elastic/elasticsearch/blob/34a78f3cf3e91cd13f51f1f4f8e378f8ed244a2b/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json#L96
"command_line",
"stack_trace",

// From https://github.com/elastic/elasticsearch/blob/34a78f3cf3e91cd13f51f1f4f8e378f8ed244a2b/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json#L113
".title",
".executable",
".name",
".working_directory",
".full_name",
"file.path",
"file.target_path",
"os.full",
"email.subject",
"vulnerability.description",
"user_agent.original",
}

for _, suffix := range suffixes {
if strings.HasSuffix(name, suffix) {
return true
}
}

return false
}

// appendECSMappingMultifields adds multifields included in ecs@mappings that are not defined anywhere, for fields
// that don't define any multifield.
func appendECSMappingMultifields(schema []FieldDefinition, prefix string) []FieldDefinition {
rules := []struct {
match func(name string) bool
definitions []FieldDefinition
}{
{
match: ecsPathWithMultifieldsMatch,
definitions: []FieldDefinition{
{
Name: "text",
Type: "match_only_text",
},
},
},
}

var result []FieldDefinition
for _, def := range schema {
fullName := def.Name
if prefix != "" {
fullName = prefix + "." + fullName
}
def.Fields = appendECSMappingMultifields(def.Fields, fullName)

for _, rule := range rules {
if !rule.match(fullName) {
continue
}
for _, mf := range rule.definitions {
// Append multifields only if they are not already defined.
f := func(d FieldDefinition) bool {
return d.Name == mf.Name
}
if !slices.ContainsFunc(def.MultiFields, f) {
def.MultiFields = append(def.MultiFields, mf)
}
}
}

result = append(result, def)
}
return result
}

//go:embed _static/allowed_geo_ips.txt
var allowedGeoIPs string

Expand Down Expand Up @@ -546,14 +632,14 @@ func (v *Validator) validateMapElement(root string, elem common.MapStr, doc comm
key := strings.TrimLeft(root+"."+name, ".")

switch val := val.(type) {
case []map[string]interface{}:
case []map[string]any:
for _, m := range val {
err := v.validateMapElement(key, m, doc)
if err != nil {
errs = append(errs, err...)
}
}
case map[string]interface{}:
case map[string]any:
if isFieldTypeFlattened(key, v.Schema) {
// Do not traverse into objects with flattened data types
// because the entire object is mapped as a single field.
Expand All @@ -573,22 +659,22 @@ func (v *Validator) validateMapElement(root string, elem common.MapStr, doc comm
return errs
}

func (v *Validator) validateScalarElement(key string, val interface{}, doc common.MapStr) error {
func (v *Validator) validateScalarElement(key string, val any, doc common.MapStr) error {
if key == "" {
return nil // root key is always valid
}

definition := FindElementDefinition(key, v.Schema)
if definition == nil && skipValidationForField(key) {
return nil // generic field, let's skip validation for now
}

if definition == nil {
switch val.(type) {
case []any, []map[string]interface{}:
return fmt.Errorf(`field "%s" is used as array of objects, expected explicit definition with type group or nested`, key)
switch {
case skipValidationForField(key):
return nil // generic field, let's skip validation for now
case isArrayOfObjects(val):
return fmt.Errorf(`field %q is used as array of objects, expected explicit definition with type group or nested`, key)
case couldBeMultifield(key, v.Schema):
return fmt.Errorf(`field %q is undefined, could be a multifield`, key)
default:
return fmt.Errorf(`field "%s" is undefined`, key)
return fmt.Errorf(`field %q is undefined`, key)
}
}

Expand Down Expand Up @@ -629,7 +715,7 @@ func (v *Validator) SanitizeSyntheticSourceDocs(docs []common.MapStr) ([]common.
// in case it is not specified any normalization and that field is an array of
// just one element, the field is going to be updated to remove the array and keep
// that element as a value.
vals, ok := contents.([]interface{})
vals, ok := contents.([]any)
if !ok {
continue
}
Expand Down Expand Up @@ -685,7 +771,7 @@ func createDocExpandingObjects(doc common.MapStr) (common.MapStr, error) {

// Possible errors found but not limited to those
// - expected map but type is string
// - expected map but type is []interface{}
// - expected map but type is []any
if strings.HasPrefix(err.Error(), "expected map but type is") {
logger.Debugf("not able to add key %s, is this a multifield?: %s", k, err)
continue
Expand Down Expand Up @@ -752,6 +838,40 @@ func isFieldTypeFlattened(key string, fieldDefinitions []FieldDefinition) bool {
return definition != nil && definition.Type == "flattened"
}

func couldBeMultifield(key string, fieldDefinitions []FieldDefinition) bool {
lastDotIndex := strings.LastIndex(key, ".")
if lastDotIndex < 0 {
// Field at the root level cannot be a multifield.
return false
}
parentKey := key[:lastDotIndex]
parent := FindElementDefinition(parentKey, fieldDefinitions)
if parent == nil {
// Parent is not defined, so not sure what this can be.
return false
}
switch parent.Type {
case "", "group", "nested", "group-nested", "object":
// Objects cannot have multifields.
return false
}
return true
}

func isArrayOfObjects(val any) bool {
switch val := val.(type) {
case []map[string]any:
return true
case []any:
for _, e := range val {
if _, isMap := e.(map[string]any); isMap {
return true
}
}
}
return false
}

func findElementDefinitionForRoot(root, searchedKey string, FieldDefinitions []FieldDefinition) *FieldDefinition {
for _, def := range FieldDefinitions {
key := strings.TrimLeft(root+"."+def.Name, ".")
Expand Down Expand Up @@ -825,15 +945,15 @@ func compareKeys(key string, def FieldDefinition, searchedKey string) bool {
return false
}

func (v *Validator) validateExpectedNormalization(definition FieldDefinition, val interface{}) error {
func (v *Validator) validateExpectedNormalization(definition FieldDefinition, val any) error {
// Validate expected normalization starting with packages following spec v2 format.
if v.specVersion.LessThan(semver2_0_0) {
return nil
}
for _, normalize := range definition.Normalize {
switch normalize {
case "array":
if _, isArray := val.([]interface{}); val != nil && !isArray {
if _, isArray := val.([]any); val != nil && !isArray {
return fmt.Errorf("expected array, found %q (%T)", val, val)
}
}
Expand Down Expand Up @@ -876,7 +996,7 @@ func validSubField(def FieldDefinition, extraPart string) bool {

// parseElementValue checks that the value stored in a field matches the field definition. For
// arrays it checks it for each Element.
func (v *Validator) parseElementValue(key string, definition FieldDefinition, val interface{}, doc common.MapStr) error {
func (v *Validator) parseElementValue(key string, definition FieldDefinition, val any, doc common.MapStr) error {
err := v.parseAllElementValues(key, definition, val, doc)
if err != nil {
return err
Expand All @@ -887,7 +1007,7 @@ func (v *Validator) parseElementValue(key string, definition FieldDefinition, va

// parseAllElementValues performs validations that must be done for all elements at once in
// case that there are multiple values.
func (v *Validator) parseAllElementValues(key string, definition FieldDefinition, val interface{}, doc common.MapStr) error {
func (v *Validator) parseAllElementValues(key string, definition FieldDefinition, val any, doc common.MapStr) error {
switch definition.Type {
case "constant_keyword", "keyword", "text":
if !v.specVersion.LessThan(semver2_0_0) {
Expand All @@ -904,7 +1024,7 @@ func (v *Validator) parseAllElementValues(key string, definition FieldDefinition
}

// parseSingeElementValue performs validations on individual values of each element.
func (v *Validator) parseSingleElementValue(key string, definition FieldDefinition, val interface{}, doc common.MapStr) error {
func (v *Validator) parseSingleElementValue(key string, definition FieldDefinition, val any, doc common.MapStr) error {
invalidTypeError := func() error {
return fmt.Errorf("field %q's Go type, %T, does not match the expected field type: %s (field value: %v)", key, val, definition.Type, val)
}
Expand Down Expand Up @@ -978,7 +1098,7 @@ func (v *Validator) parseSingleElementValue(key string, definition FieldDefiniti
// Groups should only contain nested fields, not single values.
case "group", "nested":
switch val := val.(type) {
case map[string]interface{}:
case map[string]any:
// This is probably an element from an array of objects,
// even if not recommended, it should be validated.
if v.specVersion.LessThan(semver3_0_1) {
Expand All @@ -989,7 +1109,7 @@ func (v *Validator) parseSingleElementValue(key string, definition FieldDefiniti
return nil
}
return errs
case []interface{}:
case []any:
// This can be an array of array of objects. Elasticsearh will probably
// flatten this. So even if this is quite unexpected, let's try to handle it.
if v.specVersion.LessThan(semver3_0_1) {
Expand Down Expand Up @@ -1048,8 +1168,8 @@ func (v *Validator) isAllowedIPValue(s string) bool {

// forEachElementValue visits a function for each element in the given value if
// it is an array. If it is not an array, it calls the function with it.
func forEachElementValue(key string, definition FieldDefinition, val interface{}, doc common.MapStr, fn func(string, FieldDefinition, interface{}, common.MapStr) error) error {
arr, isArray := val.([]interface{})
func forEachElementValue(key string, definition FieldDefinition, val any, doc common.MapStr, fn func(string, FieldDefinition, any, common.MapStr) error) error {
arr, isArray := val.([]any)
if !isArray {
return fn(key, definition, val, doc)
}
Expand Down Expand Up @@ -1128,13 +1248,13 @@ func ensureExpectedEventType(key string, values []string, definition FieldDefini
return nil
}

func valueToStringsSlice(value interface{}) ([]string, error) {
func valueToStringsSlice(value any) ([]string, error) {
switch v := value.(type) {
case nil:
return nil, nil
case string:
return []string{v}, nil
case []interface{}:
case []any:
var values []string
for _, e := range v {
s, ok := e.(string)
Expand Down
Loading

0 comments on commit 296eaa9

Please sign in to comment.