Skip to content
This repository has been archived by the owner on May 25, 2022. It is now read-only.

Kv parser #307

Closed
wants to merge 75 commits into from
Closed
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
75 commits
Select commit Hold shift + click to select a range
bbc73f8
contribute key value parser from observiq
Nov 16, 2021
a6b45c2
document delimiter param
Nov 16, 2021
076fafd
make tidy
Nov 16, 2021
b87a269
Merge branch 'main' into kv-parser
Nov 16, 2021
8219978
rebase
Jan 21, 2022
eba64cd
tidy
Jan 21, 2022
22c34e2
Merge branch 'open-telemetry-main' into kv-parser
Jan 21, 2022
e00181d
tidy
Jan 21, 2022
8f00220
do not support bytes
Jan 21, 2022
5ee14c7
make pair delimiter a configuration option, default to whitespace
Jan 22, 2022
580ce6b
simplify tests and test new pair delimiter option
Jan 22, 2022
373286c
remove benchmark, not required
Jan 22, 2022
427d6ce
handle single quoted values
Jan 22, 2022
0857bf7
fail if delimiter and pair delimiter are identical
Jan 22, 2022
2c6ae2f
finish remaining test coverage
Jan 22, 2022
0b0f8bf
Give recombine timeout test a more margin for error (#355)
djaglowski Jan 22, 2022
45c9011
Bump github.com/onsi/gomega from 1.17.0 to 1.18.0 in /internal/tools …
dependabot[bot] Jan 24, 2022
7559b93
Change log level for missing file case (#357)
djaglowski Jan 26, 2022
2a8dbeb
Bump k8s.io/client-go from 0.23.2 to 0.23.3 (#358)
dependabot[bot] Jan 31, 2022
d386a36
Bump github.com/onsi/gomega from 1.18.0 to 1.18.1 in /internal/tools …
dependabot[bot] Jan 31, 2022
4cecafa
Bump go.opentelemetry.io/collector from 0.42.0 to 0.43.1 (#361)
dependabot[bot] Jan 31, 2022
f25b106
Bump github.com/golangci/golangci-lint in /internal/tools (#362)
dependabot[bot] Jan 31, 2022
f6a5ca2
Bump go.opentelemetry.io/collector from 0.43.1 to 0.44.0 (#365)
dependabot[bot] Feb 7, 2022
fcea3af
Remove agent.WithConfigFiles (#374)
djaglowski Feb 10, 2022
3df09ff
file_input should not attempt to track lost files on windows (#366)
djaglowski Feb 10, 2022
1f8bd51
Remove the Dockerfile (#381)
djaglowski Feb 13, 2022
44270c1
Remove version package (#382)
djaglowski Feb 13, 2022
c929bb9
Bump go.uber.org/zap from 1.20.0 to 1.21.0 (#384)
dependabot[bot] Feb 14, 2022
e039b7c
Remove .dockerignore file (#385)
djaglowski Feb 14, 2022
77b222e
Remove plugins (#377)
djaglowski Feb 14, 2022
a9f0162
Syslog input as wrapper (#376)
djaglowski Feb 15, 2022
fd0fe16
Make operator.Builder.Build return one operator (#386)
djaglowski Feb 15, 2022
48d725b
Remove 'stanza_input' operator and related logger wrapper (#389)
djaglowski Feb 16, 2022
43a325c
Remove notion of operator namespaces (#387)
djaglowski Feb 16, 2022
ee764be
Remove BuildContext (#393)
djaglowski Feb 16, 2022
1cb0229
Merge agent package into pipeline package (#395)
djaglowski Feb 18, 2022
a60a77d
Tidy up operator testdata files (#398)
djaglowski Feb 18, 2022
8453fbc
Remove 'builtin' directory layer in operator packages (#400)
djaglowski Feb 18, 2022
bc0b94b
Bump go.opentelemetry.io/collector from 0.44.0 to 0.45.0 (#407)
dependabot[bot] Feb 21, 2022
8cb8712
Update changelog in preparation for release v0.25.0 (#402)
djaglowski Feb 21, 2022
5e83f90
Update dependencies and related tooling (#409)
djaglowski Feb 22, 2022
b3e4572
change attributes (#401)
chaitanyaphalak Feb 22, 2022
78eb238
Remove write_to setting (#412)
djaglowski Feb 23, 2022
c118cd6
change resources (#411)
chaitanyaphalak Feb 23, 2022
312f08c
fix iteration (#413)
rockb1017 Feb 25, 2022
248eda3
Correctly parse timestamps from 1970 (#417)
djaglowski Feb 25, 2022
e99ae1f
Update changelog ahead of v0.26.0 release (#418)
djaglowski Feb 25, 2022
07cef82
Bump github.com/securego/gosec/v2 in /internal/tools (#419)
dependabot[bot] Mar 7, 2022
8bd9504
feat(operator/recombine): do not combine logs before first_entry matc…
sumo-drosiek Mar 8, 2022
6dace5c
Bump go.opentelemetry.io/collector from 0.45.0 to 0.46.0 (#422)
dependabot[bot] Mar 8, 2022
8064c5b
Update dependencies (#426)
djaglowski Mar 9, 2022
b3a8528
Add support for parsing multiline csv records (#425)
djaglowski Mar 10, 2022
13688f4
Update changelog ahead of v0.27.0 release (#427)
djaglowski Mar 10, 2022
48c8d7d
Update CHANGELOG.md (#432)
djaglowski Mar 10, 2022
e92a761
Remove unused tools (#435)
djaglowski Mar 15, 2022
547e79e
Remove notion of a default timestamp (#436)
djaglowski Mar 15, 2022
f054b8f
Bump gonum to resolve iteration issue in Go 1.18 (#437)
djaglowski Mar 16, 2022
3a4af2e
Revert version of syslog-go (#438)
djaglowski Mar 17, 2022
fd465dc
Bump github.com/stretchr/testify from 1.7.0 to 1.7.1 (#444)
dependabot[bot] Mar 21, 2022
a798d75
Bump github.com/golangci/golangci-lint in /internal/tools (#442)
dependabot[bot] Mar 21, 2022
b59ffe9
fix(helpers/multiline): fix force flushing with multiline (#434)
sumo-drosiek Mar 22, 2022
5f623db
Fix typo in recombine operator docs (#452)
djaglowski Mar 23, 2022
ac38b6d
Remove '$' from field syntax (#364)
djaglowski Mar 24, 2022
636ffc5
Update attribute names used by file_input, to match semantic conventi…
djaglowski Mar 24, 2022
5774206
Remove restructure operator (#371)
djaglowski Mar 24, 2022
28dd828
Remove metadata operator (#429)
djaglowski Mar 24, 2022
ecefbe3
Add ObservedTimestamp to entry (#370)
djaglowski Mar 24, 2022
7bf6948
Add prefixes to many examples in documentation (#453)
djaglowski Mar 24, 2022
90e31d9
Provide a dedicated mechanism for parsing logger name (#397)
djaglowski Mar 24, 2022
104d1b9
Enforce maximum SD-NAME length of 32, per RFC5424 (#439)
djaglowski Mar 24, 2022
b3803d7
Update Changelog ahead of major set of breaking changes (#430)
djaglowski Mar 28, 2022
97c3b23
tidy
Nov 16, 2021
7f8d2e7
newline
Mar 28, 2022
3af7827
remove default value for empty string. remove TODO, it is okay to rep…
Mar 28, 2022
585f1a0
doc feadback
Mar 28, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions docs/operators/key_value_parser.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
## `key_value_parser` operator

The `key_value_parser` operator parses the string-type field selected by `parse_from` into key value pairs. All values are of type string.

### Configuration Fields

| Field | Default | Description |
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: To be consistent with other docs, can you add punctuation to the descriptions.

| --- | --- | --- |
| `id` | `key_value_parser` | A unique identifier for the operator |
| `delimiter` | `=` | The delimiter used for splitting a value into a key value pair |
| `pair_delimiter` | | The delimiter used for seperating key value pairs, defaults to whitespace |
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries |
| `parse_from` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed into key value pairs |
| `parse_to` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed as into key value pairs |
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The library technically still interprets $ as $body, but we've gotten away from documenting it this way. Better to be clear.

| `preserve_to` | | Preserves the unparsed value at the specified [field](/docs/types/field.md) |
| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](/docs/types/on_error.md) |
| `if` | | An [expression](/docs/types/expression.md) that, when set, will be evaluated to determine whether this operator should be used for the given entry. This allows you to do easy conditional parsing without branching logic with routers. |
| `timestamp` | `nil` | An optional [timestamp](/docs/types/timestamp.md) block which will parse a timestamp field before passing the entry to the output operator |
| `severity` | `nil` | An optional [severity](/docs/types/severity.md) block which will parse a severity field before passing the entry to the output operator |


### Example Configurations

#### Parse the field `message` into key value pairs

Configuration:
```yaml
- type: key_value_parser
parse_from: message
```

<table>
<tr><td> Input body </td> <td> Output body </td></tr>
<tr>
<td>

```json
{
"timestamp": "",
"body": {
"message": "name=stanza"
}
}
```

</td>
<td>

```json
{
"timestamp": "",
"body": {
"name": "stanza"
}
}
```

</td>
</tr>
</table>

#### Parse the field `message` into key value pairs, using a non default delimiter

Configuration:
```yaml
- type: key_value_parser
parse_from: message
delimiter: ":"
```

<table>
<tr><td> Input body </td> <td> Output body </td></tr>
<tr>
<td>

```json
{
"timestamp": "",
"body": {
"message": "name:stanza"
}
}
```

</td>
<td>

```json
{
"timestamp": "",
"body": {
"name": "stanza"
}
}
```

#### Parse the field `message` into key value pairs, using a non default pair delimiter

Configuration:
```yaml
- type: key_value_parser
parse_from: message
pair_delimiter: "!"
```

<table>
<tr><td> Input body </td> <td> Output body </td></tr>
<tr>
<td>

```json
{
"timestamp": "",
"body": {
"message": "name=stanza ! org=otel ! group=dev"
}
}
```

</td>
<td>

```json
{
"timestamp": "",
"body": {
"name": "stanza",
"org": "otel",
"group": "dev"
}
}
```

</td>
</tr>
</table>

#### Parse the field `message` as key value pairs, and parse the timestamp

Configuration:
```yaml
- type: key_value_parser
parse_from: message
timestamp:
parse_from: seconds_since_epoch
layout_type: epoch
layout: s
```

<table>
<tr><td> Input body </td> <td> Output body </td></tr>
<tr>
<td>

```json
{
"timestamp": "",
"body": {
"message": "name=stanza seconds_since_epoch=1136214245"
}
}
```

</td>
<td>

```json
{
"timestamp": "2006-01-02T15:04:05-07:00",
"body": {
"name": "stanza"
}
}
```

</td>
</tr>
</table>
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.17
require (
github.com/antonmedv/expr v1.9.0
github.com/bmatcuk/doublestar/v3 v3.0.0
github.com/hashicorp/go-multierror v1.1.0
github.com/jpillora/backoff v1.0.0
github.com/json-iterator/go v1.1.12
github.com/mitchellh/mapstructure v1.4.3
Expand Down Expand Up @@ -35,12 +36,13 @@ require (
github.com/google/go-cmp v0.5.6 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/googleapis/gnostic v0.5.5 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/kr/pretty v0.3.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/objx v0.2.0 // indirect
github.com/stretchr/objx v0.1.1 // indirect
go.uber.org/atomic v1.9.0 // indirect
go.uber.org/multierr v1.7.0 // indirect
golang.org/x/net v0.0.0-20211209124913-491a49abca63 // indirect
Expand Down
5 changes: 3 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:Fecb
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M=
github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
Expand All @@ -298,6 +299,7 @@ github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjh
github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/go-multierror v1.1.0 h1:B9UzwGQJehnUY1yNrnwREHc3fGbC2xefo8g4TbElacI=
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
github.com/hashicorp/go-plugin v1.0.1/go.mod h1:++UyYGoz3o5w9ZzAdZxtQKrWWP+iqPBn3cQptSMzBuY=
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
Expand Down Expand Up @@ -516,9 +518,8 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An
github.com/spf13/viper v1.10.0/go.mod h1:SoyBPwAtKDzypXNDFKN5kzH7ppppbGZtls1UpIy5AsM=
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1 h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v0.0.0-20161117074351-18a02ba4a312/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
Expand Down
122 changes: 122 additions & 0 deletions operator/builtin/parser/keyvalue/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package keyvalue

import (
"testing"

"github.com/open-telemetry/opentelemetry-log-collection/entry"
"github.com/open-telemetry/opentelemetry-log-collection/operator/helper"
"github.com/open-telemetry/opentelemetry-log-collection/operator/helper/operatortest"
)

func TestKVParserConfig(t *testing.T) {
cases := []operatortest.ConfigUnmarshalTest{
{
Name: "default",
Expect: defaultCfg(),
},
{
Name: "parse_from_simple",
Expect: func() *KVParserConfig {
cfg := defaultCfg()
cfg.ParseFrom = entry.NewBodyField("from")
return cfg
}(),
},
{
Name: "parse_to_simple",
Expect: func() *KVParserConfig {
cfg := defaultCfg()
cfg.ParseTo = entry.NewBodyField("log")
return cfg
}(),
},
{
Name: "on_error_drop",
Expect: func() *KVParserConfig {
cfg := defaultCfg()
cfg.OnError = "drop"
return cfg
}(),
},
{
Name: "timestamp",
Expect: func() *KVParserConfig {
cfg := defaultCfg()
parseField := entry.NewBodyField("timestamp_field")
newTime := helper.TimeParser{
LayoutType: "strptime",
Layout: "%Y-%m-%d",
ParseFrom: &parseField,
}
cfg.TimeParser = &newTime
return cfg
}(),
},
{
Name: "severity",
Expect: func() *KVParserConfig {
cfg := defaultCfg()
parseField := entry.NewBodyField("severity_field")
severityField := helper.NewSeverityParserConfig()
severityField.ParseFrom = &parseField
mapping := map[interface{}]interface{}{
"critical": "5xx",
"error": "4xx",
"info": "3xx",
"debug": "2xx",
}
severityField.Mapping = mapping
cfg.SeverityParserConfig = &severityField
return cfg
}(),
},
{
Name: "preserve_to",
Expect: func() *KVParserConfig {
cfg := defaultCfg()
preserve := entry.NewBodyField("aField")
cfg.PreserveTo = &preserve
return cfg
}(),
},
{
Name: "delimiter",
Expect: func() *KVParserConfig {
cfg := defaultCfg()
cfg.Delimiter = ";"
return cfg
}(),
},
{
Name: "pair_delimiter",
Expect: func() *KVParserConfig {
cfg := defaultCfg()
cfg.PairDelimiter = ";"
return cfg
}(),
},
}

for _, tc := range cases {
t.Run(tc.Name, func(t *testing.T) {
tc.Run(t, defaultCfg())
})
}
}

func defaultCfg() *KVParserConfig {
return NewKVParserConfig("key_value_parser")
}
Loading