Skip to content

Commit

Permalink
New processor: urldecode (elastic#17505)
Browse files Browse the repository at this point in the history
* add urldecode processor

* update reference yml files

* update doc with PR number

* remove unexpected format

* update from feedback

* move urldecode processor into its own package

* update from feedback
  • Loading branch information
sincejune authored Apr 7, 2020
1 parent c0f1854 commit 96c3018
Show file tree
Hide file tree
Showing 20 changed files with 546 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Update supported versions of `redis` output. {pull}17198[17198]
- Update documentation for system.process.memory fields to include clarification on Windows os's. {pull}17268[17268]
- Add optional regex based cid extractor to `add_kubernetes_metadata` processor. {pull}17360[17360]
- Add `urldecode` processor to for decoding URL-encoded fields. {pull}17505[17505]

*Auditbeat*

Expand Down
10 changes: 10 additions & 0 deletions auditbeat/auditbeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,16 @@ auditbeat.modules:
# max_bytes: 1024
# fail_on_error: false
# ignore_missing: true
#
# The following example URL-decodes the value of field1 to field2
#
#processors:
#- urldecode:
# fields:
# - from: "field1"
# to: "field2"
# ignore_missing: false
# fail_on_error: true

#============================= Elastic Cloud ==================================

Expand Down
10 changes: 10 additions & 0 deletions filebeat/filebeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1084,6 +1084,16 @@ filebeat.inputs:
# max_bytes: 1024
# fail_on_error: false
# ignore_missing: true
#
# The following example URL-decodes the value of field1 to field2
#
#processors:
#- urldecode:
# fields:
# - from: "field1"
# to: "field2"
# ignore_missing: false
# fail_on_error: true

#============================= Elastic Cloud ==================================

Expand Down
26 changes: 26 additions & 0 deletions filebeat/tests/system/test_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,32 @@ def test_decode_csv_fields_all_options(self):
["42", "hello world", "string\twith tabs and \"broken\" quotes"],
])

def test_urldecode_defaults(self):
"""
Check URL-decoding using defaults
"""
self.render_config_template(
path=os.path.abspath(self.working_dir) + "/test.log",
processors=[{
"urldecode": {
"fields": [{
"from": "message",
"to": "decoded"
}]
},
}]
)

self._init_and_read_test_input([
"correct data\n",
"correct%20data\n",
])

self._assert_expected_lines([
"correct data",
"correct data",
], field="decoded")

def test_javascript_processor_add_host_metadata(self):
"""
Check JS processor with add_host_metadata
Expand Down
10 changes: 10 additions & 0 deletions heartbeat/heartbeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,16 @@ heartbeat.scheduler:
# max_bytes: 1024
# fail_on_error: false
# ignore_missing: true
#
# The following example URL-decodes the value of field1 to field2
#
#processors:
#- urldecode:
# fields:
# - from: "field1"
# to: "field2"
# ignore_missing: false
# fail_on_error: true

#============================= Elastic Cloud ==================================

Expand Down
10 changes: 10 additions & 0 deletions journalbeat/journalbeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,16 @@ setup.template.settings:
# max_bytes: 1024
# fail_on_error: false
# ignore_missing: true
#
# The following example URL-decodes the value of field1 to field2
#
#processors:
#- urldecode:
# fields:
# - from: "field1"
# to: "field2"
# ignore_missing: false
# fail_on_error: true

#============================= Elastic Cloud ==================================

Expand Down
10 changes: 10 additions & 0 deletions libbeat/_meta/config.reference.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,16 @@
# max_bytes: 1024
# fail_on_error: false
# ignore_missing: true
#
# The following example URL-decodes the value of field1 to field2
#
#processors:
#- urldecode:
# fields:
# - from: "field1"
# to: "field2"
# ignore_missing: false
# fail_on_error: true

#============================= Elastic Cloud ==================================

Expand Down
1 change: 1 addition & 0 deletions libbeat/cmd/instance/imports_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ import (
_ "github.com/elastic/beats/v7/libbeat/processors/fingerprint"
_ "github.com/elastic/beats/v7/libbeat/processors/registered_domain"
_ "github.com/elastic/beats/v7/libbeat/processors/translate_sid"
_ "github.com/elastic/beats/v7/libbeat/processors/urldecode"
_ "github.com/elastic/beats/v7/libbeat/publisher/includes" // Register publisher pipeline modules
)
6 changes: 6 additions & 0 deletions libbeat/docs/processors-list.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ endif::[]
ifndef::no_translate_sid_processor[]
* <<processor-translate-sid, `translate_sid`>>
endif::[]
ifndef::no_urldecode_processor[]
* <<urldecode, `urldecode`>>
endif::[]
//# end::processors-list[]

//# tag::processors-include[]
Expand Down Expand Up @@ -204,5 +207,8 @@ endif::[]
ifndef::no_translate_sid_processor[]
include::{libbeat-processors-dir}/translate_sid/docs/translate_sid.asciidoc[]
endif::[]
ifndef::no_urldecode_processor[]
include::{libbeat-processors-dir}/urldecode/docs/urldecode.asciidoc[]
endif::[]

//# end::processors-include[]
38 changes: 38 additions & 0 deletions libbeat/processors/urldecode/docs/urldecode.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[[urldecode]]
=== URL Decode

++++
<titleabbrev>urldecode</titleabbrev>
++++

The `urldecode` processor specifies a list of fields to decode from URL encoded format. Under the `fields`
key, each entry contains a `from: source-field` and a `to: target-field` pair, where:

* `from` is the source field name
* `to` is the target field name (defaults to the `from` value)

[source,yaml]
-------
processors:
- urldecode:
fields:
- from: "field1"
to: "field2"
ignore_missing: false
fail_on_error: true
-------

In the example above:

- field1 is decoded in field2

The `urldecode` processor has the following configuration settings:

`ignore_missing`:: (Optional) If set to true, no error is logged in case a key
which should be URL-decoded is missing. Default is `false`.

`fail_on_error`:: (Optional) If set to true, in case of an error the URL-decoding
of fields is stopped and the original event is returned. If set to false, decoding
continues also if an error happened during decoding. Default is `true`.

See <<conditions>> for a list of supported conditions.
130 changes: 130 additions & 0 deletions libbeat/processors/urldecode/urldecode.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package urldecode

import (
"fmt"
"net/url"

"github.com/pkg/errors"

"github.com/elastic/beats/v7/libbeat/beat"
"github.com/elastic/beats/v7/libbeat/common"
"github.com/elastic/beats/v7/libbeat/logp"
"github.com/elastic/beats/v7/libbeat/processors"
"github.com/elastic/beats/v7/libbeat/processors/checks"
jsprocessor "github.com/elastic/beats/v7/libbeat/processors/script/javascript/module/processor"
)

type urlDecode struct {
config urlDecodeConfig
log *logp.Logger
}

type urlDecodeConfig struct {
Fields []fromTo `config:"fields" validate:"required"`
IgnoreMissing bool `config:"ignore_missing"`
FailOnError bool `config:"fail_on_error"`
}

type fromTo struct {
From string `config:"from" validate:"required"`
To string `config:"to"`
}

func init() {
processors.RegisterPlugin("urldecode",
checks.ConfigChecked(New,
checks.RequireFields("fields"),
checks.AllowedFields("fields", "ignore_missing", "fail_on_error")))
jsprocessor.RegisterPlugin("URLDecode", New)
}

func New(c *common.Config) (processors.Processor, error) {
config := urlDecodeConfig{
IgnoreMissing: false,
FailOnError: true,
}

if err := c.Unpack(&config); err != nil {
return nil, fmt.Errorf("failed to unpack the configuration of urldecode processor: %s", err)
}

return &urlDecode{
config: config,
log: logp.NewLogger("urldecode"),
}, nil

}

func (p *urlDecode) Run(event *beat.Event) (*beat.Event, error) {
var backup common.MapStr
if p.config.FailOnError {
backup = event.Fields.Clone()
}

for _, field := range p.config.Fields {
err := p.decodeField(field.From, field.To, event)
if err != nil {
errMsg := fmt.Errorf("failed to decode fields in urldecode processor: %v", err)
p.log.Debug(errMsg.Error())
if p.config.FailOnError {
event.Fields = backup
event.PutValue("error.message", errMsg.Error())
return event, err
}
}
}

return event, nil
}

func (p *urlDecode) decodeField(from string, to string, event *beat.Event) error {
value, err := event.GetValue(from)
if err != nil {
if p.config.IgnoreMissing && errors.Cause(err) == common.ErrKeyNotFound {
return nil
}
return fmt.Errorf("could not fetch value for key: %s, Error: %v", from, err)
}

encodedString, ok := value.(string)
if !ok {
return fmt.Errorf("invalid type for `from`, expecting a string received %T", value)
}

decodedData, err := url.QueryUnescape(encodedString)
if err != nil {
return fmt.Errorf("error trying to URL-decode %s: %v", encodedString, err)
}

target := to
if to == "" {
target = from
}

if _, err := event.PutValue(target, decodedData); err != nil {
return fmt.Errorf("could not put value: %s: %v, %v", decodedData, target, err)
}

return nil
}

func (p *urlDecode) String() string {
return "urldecode=" + fmt.Sprintf("%+v", p.config.Fields)
}
Loading

0 comments on commit 96c3018

Please sign in to comment.