Skip to content

Commit

Permalink
libbeat: add support for defining analyzers in-line in fields.yml fil…
Browse files Browse the repository at this point in the history
…es (#28926) (#28981)

(cherry picked from commit 62ec678)

Co-authored-by: Dan Kortschak <[email protected]>
Co-authored-by: Dan Kortschak <[email protected]>
3 people authored Nov 16, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent f3f2b24 commit 633ac3f
Showing 10 changed files with 384 additions and 87 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
@@ -267,6 +267,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Add options to configure k8s client qps/burst. {pull}28151[28151]
- Update to ECS 8.0 fields. {pull}28620[28620]
- Add http.pprof.enabled option to libbeat to allow http/pprof endpoints on the socket that libbeat creates for metrics. {issue}21965[21965]
- Support custom analyzers in fields.yml. {issue}28540[28540] {pull}28926[28926]

*Auditbeat*

40 changes: 40 additions & 0 deletions docs/devguide/fields-yml.asciidoc
Original file line number Diff line number Diff line change
@@ -121,3 +121,43 @@ use in aggregations or ordering, you can use a multi-field mapping:

For more information, see the {ref}/multi-fields.html[{es} documentation about
multi-fields].

==== Defining a text analyzer in-line

It is possible to define a new text analyzer or search analyzer in-line with
the field definition in the field's mapping parameters.

For example, you can define a new text analyzer that does not break hyphenated names:

[source,yaml]
----------------------------------------------------------------------
- key: mybeat
title: mybeat
description: These are the fields used by mybeat.
fields:
- name: last_name
type: text
required: true
description: >
The last name.
analyzer:
mybeat_hyphenated_name: <1>
type: pattern <2>
pattern: "[\\W&&[^-]]+" <3>
search_analyzer:
mybeat_hyphenated_name: <4>
type: pattern
pattern: "[\\W&&[^-]]+"
----------------------------------------------------------------------
<1> Use a newly defined text analyzer
<2> Define the custome analyzer type
<3> Specify the analyzer behaviour
<4> Use the same analyzer for the search

The names of custom analyzers that are defined in-line may not be reused for a different
text analyzer. If a text analyzer name is reused it is checked for matching existing
instances of the analyzer. It is recommended that the analyzer name is prefixed with the
beat name to avoid name clashes.

For more information, see {ref}/analysis-custom-analyzer.html[{es} documentation about
defining custom text analyzers].
46 changes: 36 additions & 10 deletions libbeat/mapping/field.go
Original file line number Diff line number Diff line change
@@ -24,13 +24,14 @@ import (
"github.com/joeshaw/multierror"
"github.com/pkg/errors"

"github.com/elastic/beats/v7/libbeat/common"
"github.com/elastic/go-ucfg/yaml"
)

//This reflects allowed attributes for field definitions in the fields.yml.
//No logic is put into this data structure.
//The purpose is to enable using different kinds of transformation, on top of the same data structure.
//Current transformation:
// This reflects allowed attributes for field definitions in the fields.yml.
// No logic is put into this data structure.
// The purpose is to enable using different kinds of transformation, on top of the same data structure.
// Current transformation:
// -ElasticSearch Template
// -Kibana Index Pattern

@@ -44,8 +45,8 @@ type Field struct {
Fields Fields `config:"fields"`
MultiFields Fields `config:"multi_fields"`
Enabled *bool `config:"enabled"`
Analyzer string `config:"analyzer"`
SearchAnalyzer string `config:"search_analyzer"`
Analyzer Analyzer `config:"analyzer"`
SearchAnalyzer Analyzer `config:"search_analyzer"`
Norms bool `config:"norms"`
Dynamic DynamicType `config:"dynamic"`
Index *bool `config:"index"`
@@ -125,6 +126,35 @@ func (d *DynamicType) Unpack(s string) error {
return nil
}

type Analyzer struct {
Name string
Definition interface{}
}

func (a *Analyzer) Unpack(v interface{}) error {
var m common.MapStr
switch v := v.(type) {
case string:
a.Name = v
return nil
case common.MapStr:
m = v
case map[string]interface{}:
m = common.MapStr(v)
default:
return fmt.Errorf("'%v' is invalid analyzer setting", v)
}

if len(m) != 1 {
return fmt.Errorf("'%v' is invalid analyzer setting", v)
}
for a.Name, a.Definition = range m {
break
}

return nil
}

// Validate ensures objectTypeParams are not mixed with top level objectType configuration
func (f *Field) Validate() error {
if err := f.validateType(); err != nil {
@@ -264,7 +294,6 @@ func (f Fields) HasKey(key string) bool {
func (f Fields) GetField(key string) *Field {
keys := strings.Split(key, ".")
return f.getField(keys)

}

// HasNode checks if inside fields the given node exists
@@ -276,7 +305,6 @@ func (f Fields) HasNode(key string) bool {
}

func (f Fields) hasNode(keys []string) bool {

// Nothing to compare, so does not contain it
if len(keys) == 0 {
return false
@@ -286,7 +314,6 @@ func (f Fields) hasNode(keys []string) bool {
keys = keys[1:]

for _, field := range f {

if field.Name == key {

//// It's the last key to compare
@@ -373,7 +400,6 @@ func (f Fields) GetKeys() []string {
}

func (f Fields) getKeys(namespace string) []string {

var keys []string

for _, field := range f {
47 changes: 45 additions & 2 deletions libbeat/mapping/field_test.go
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
package mapping

import (
"fmt"
"strings"
"testing"

@@ -58,7 +59,8 @@ func TestFieldsHasNode(t *testing.T) {
Field{Name: "a", Fields: Fields{
Field{Name: "b", Fields: Fields{
Field{Name: "c"},
}}}},
}},
}},
},
hasNode: true,
},
@@ -68,7 +70,8 @@ func TestFieldsHasNode(t *testing.T) {
Field{Name: "a", Fields: Fields{
Field{Name: "b", Fields: Fields{
Field{Name: "c"},
}}}},
}},
}},
},
hasNode: true,
},
@@ -185,6 +188,46 @@ func TestDynamicYaml(t *testing.T) {
}
}

func TestAnalyzer(t *testing.T) {
tests := map[string]struct {
input []byte
output Field
err error
}{
"simple analyzer": {
input: []byte(`{name: test, analyzer: simple}`),
output: Field{
Name: "test",
Analyzer: Analyzer{Name: "simple"},
},
err: nil,
},
"pattern analyzer": {
input: []byte(`{"name": "test", "analyzer": {"custom": {"type": "pattern", "pattern":"[\\W&&[^-]]+"}}}`),
output: Field{
Name: "test",
Analyzer: Analyzer{Name: "custom", Definition: map[string]interface{}{"type": "pattern", "pattern": "[\\W\u0026\u0026[^-]]+"}},
},
err: nil,
},
}

for name, test := range tests {
t.Run(name, func(t *testing.T) {
keys := Field{}

cfg, err := yaml.NewConfig(test.input)
assert.NoError(t, err)
err = cfg.Unpack(&keys)

if fmt.Sprint(err) != fmt.Sprint(test.err) {
t.Fatalf("unexpected error for %s: got:%v want:%v", name, err, test.err)
}
assert.Equal(t, test.output.Analyzer, keys.Analyzer)
})
}
}

func TestGetKeys(t *testing.T) {
tests := []struct {
fields Fields
Loading

0 comments on commit 633ac3f

Please sign in to comment.