From ee5d5bd81ddd730c8ea7c536fc591e7fcfc8059a Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Fri, 27 Mar 2020 13:43:14 +0100 Subject: [PATCH] Ignore trailing spaces in CEF messages (#17253) This patch updates the ragel state machine to skip trailing spaces at the end of CEF messages. Some CEF exporters, Check Point for example, have been observed to add a trailing space to CEF messages: > "CEF:0:| [...] src=127.0.0.1 " Currently, this space character is interpreted as part of the last field's value, which can cause decoding errors if the value is an integer or an IP address. For maximizing compatibility, we also want to ignore other kinds of space characters (new line, carriage return, tab). For example we can get a trailing newline when processing CEF messages from UDP input instead of syslog, which removes newlines. Spaces in non-final extensions are preserved, as the CEF standard permits (but discourages) it's use in non-final extensions. --- CHANGELOG.next.asciidoc | 1 + x-pack/filebeat/module/cef/log/test/cef.log | 1 + .../module/cef/log/test/cef.log-expected.json | 34 ++++ .../filebeat/processors/decode_cef/cef/cef.go | 5 +- .../filebeat/processors/decode_cef/cef/cef.rl | 8 +- .../processors/decode_cef/cef/cef_test.go | 52 +++++ .../processors/decode_cef/cef/parser.go | 186 +++++++++++------- 7 files changed, 211 insertions(+), 76 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index fa2aedaa3a7..6c968b50b2b 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -100,6 +100,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix default index pattern in IBM MQ filebeat dashboard. {pull}17146[17146] - Fix `elasticsearch.gc` fileset to not collect _all_ logs when Elasticsearch is running in Docker. {issue}13164[13164] {issue}16583[16583] {pull}17164[17164] - Fixed a mapping exception when ingesting CEF logs that used the spriv or dpriv extensions. {issue}17216[17216] {pull}17220[17220] +- CEF: Fixed decoding errors caused by trailing spaces in messages. {pull}17253[17253] *Heartbeat* diff --git a/x-pack/filebeat/module/cef/log/test/cef.log b/x-pack/filebeat/module/cef/log/test/cef.log index e9076fb3aad..1e8ab441ff7 100644 --- a/x-pack/filebeat/module/cef/log/test/cef.log +++ b/x-pack/filebeat/module/cef/log/test/cef.log @@ -1,3 +1,4 @@ CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Web request|low|eventId=3457 requestMethod=POST slat=38.915 slong=-77.511 proto=TCP sourceServiceName=httpd requestContext=https://www.google.com src=6.7.8.9 spt=33876 dst=192.168.10.1 dpt=443 request=https://www.example.com/cart CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Authentication|low|eventId=123 src=6.7.8.9 spt=33876 dst=1.2.3.4 dpt=443 duser=alice suser=bob destinationTranslatedAddress=10.10.10.10 fileHash=bc8bbe52f041fd17318f08a0f73762ce oldFileHash=a9796280592f86b74b27e370662d41eb CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Authentication|low|spriv=user dpriv=root +CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Authentication|low|message=This event is padded with whitespace dst=192.168.1.2 src=192.168.3.4 diff --git a/x-pack/filebeat/module/cef/log/test/cef.log-expected.json b/x-pack/filebeat/module/cef/log/test/cef.log-expected.json index 4d5df15433b..99b9348a741 100644 --- a/x-pack/filebeat/module/cef/log/test/cef.log-expected.json +++ b/x-pack/filebeat/module/cef/log/test/cef.log-expected.json @@ -150,5 +150,39 @@ "tags": [ "cef" ] + }, + { + "cef.device.event_class_id": "18", + "cef.device.product": "Vaporware", + "cef.device.vendor": "Elastic", + "cef.device.version": "1.0.0-alpha", + "cef.extensions.destinationAddress": "192.168.1.2", + "cef.extensions.message": "This event is padded with whitespace", + "cef.extensions.sourceAddress": "192.168.3.4", + "cef.name": "Authentication", + "cef.severity": "low", + "cef.version": "0", + "destination.ip": "192.168.1.2", + "event.code": "18", + "event.dataset": "cef.log", + "event.module": "cef", + "event.original": "CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Authentication|low|message=This event is padded with whitespace dst=192.168.1.2 src=192.168.3.4 ", + "event.severity": 0, + "fileset.name": "log", + "input.type": "log", + "log.offset": 611, + "message": "This event is padded with whitespace", + "observer.product": "Vaporware", + "observer.vendor": "Elastic", + "observer.version": "1.0.0-alpha", + "related.ip": [ + "192.168.1.2", + "192.168.3.4" + ], + "service.type": "cef", + "source.ip": "192.168.3.4", + "tags": [ + "cef" + ] } ] \ No newline at end of file diff --git a/x-pack/filebeat/processors/decode_cef/cef/cef.go b/x-pack/filebeat/processors/decode_cef/cef/cef.go index 72d94bc969d..e3bc284cd9c 100644 --- a/x-pack/filebeat/processors/decode_cef/cef/cef.go +++ b/x-pack/filebeat/processors/decode_cef/cef/cef.go @@ -14,7 +14,10 @@ import ( // Parser is generated from a ragel state machine using the following command: //go:generate ragel -Z -G1 cef.rl -o parser.go //go:generate goimports -l -w parser.go - +// +// Run go vet and remove any unreachable code in the generated parser.go. +// The go generator outputs duplicated goto statements sometimes. +// // An SVG rendering of the state machine can be viewed by opening cef.svg in // Chrome / Firefox. //go:generate ragel -V -p cef.rl -o cef.dot diff --git a/x-pack/filebeat/processors/decode_cef/cef/cef.rl b/x-pack/filebeat/processors/decode_cef/cef/cef.rl index 3ac5af35a40..a34ced7d87f 100644 --- a/x-pack/filebeat/processors/decode_cef/cef/cef.rl +++ b/x-pack/filebeat/processors/decode_cef/cef/cef.rl @@ -124,13 +124,13 @@ func (e *Event) unpack(data string) error { extension_key_start_chars = alnum | '_'; extension_key_chars = extension_key_start_chars | '.' | ',' | '[' | ']'; extension_key_pattern = extension_key_start_chars extension_key_chars*; - extension_value_chars = backslash | escape_equal | (any -- equal -- escape); + extension_value_chars_nospace = backslash | escape_equal | (any -- equal -- escape -- space); # Extension fields. extension_key = extension_key_pattern >mark %extension_key; - extension_value = (extension_value_chars @extension_value_mark)* >extension_value_start $err(extension_err); - extension = extension_key equal extension_value %/extension_eof; - extensions = " "* extension (" " extension)*; + extension_value = (space* extension_value_chars_nospace @extension_value_mark)* >extension_value_start $err(extension_err); + extension = extension_key equal extension_value; + extensions = " "* extension (space* " " extension)* space* %/extension_eof; # gobble_extension attempts recovery from a malformed value by trying to # advance to the next extension key and re-entering the main state machine. diff --git a/x-pack/filebeat/processors/decode_cef/cef/cef_test.go b/x-pack/filebeat/processors/decode_cef/cef/cef_test.go index 361b105cd52..fb67afbe657 100644 --- a/x-pack/filebeat/processors/decode_cef/cef/cef_test.go +++ b/x-pack/filebeat/processors/decode_cef/cef/cef_test.go @@ -44,6 +44,14 @@ const ( malformedExtensionEscape = `CEF:0|FooBar|Web Gateway|1.2.3.45.67|200|Success|2|rt=Sep 07 2018 14:50:39 cat=Access Log dst=1.1.1.1 dhost=foo.example.com suser=redacted src=2.2.2.2 requestMethod=POST request='https://foo.example.com/bar/bingo/1' requestClientApplication='Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test=' cs1= cs1Label=Foo Bar` multipleMalformedExtensionValues = `CEF:0|vendor|product|version|event_id|name|Very-High| msg=Hello World error=Failed because id==old_id user=root angle=106.7<=180` + + paddedMessage = `CEF:0|security|threatmanager|1.0|100|message is padded|10|spt=1232 msg=Trailing space in non-final extensions is preserved src=10.0.0.192 ` + + crlfMessage = "CEF:0|security|threatmanager|1.0|100|message is padded|10|spt=1232 msg=Trailing space in final extensions is not preserved\t \r\n" + + tabMessage = "CEF:0|security|threatmanager|1.0|100|message is padded|10|spt=1232 msg=Tabs\tand\rcontrol\ncharacters are preserved\t src=127.0.0.1" + + tabNoSepMessage = "CEF:0|security|threatmanager|1.0|100|message has tabs|10|spt=1232 msg=Tab is not a separator\tsrc=127.0.0.1" ) var testMessages = []string{ @@ -60,6 +68,9 @@ var testMessages = []string{ escapesInExtension, malformedExtensionEscape, multipleMalformedExtensionValues, + paddedMessage, + crlfMessage, + tabMessage, } func TestGenerateFuzzCorpus(t *testing.T) { @@ -322,6 +333,47 @@ func TestEventUnpack(t *testing.T) { err := e.Unpack("CEF:0|||||||a=") assert.NoError(t, err) }) + + t.Run("padded", func(t *testing.T) { + var e Event + err := e.Unpack(paddedMessage) + assert.NoError(t, err) + assert.Equal(t, map[string]*Field{ + "src": IPField("10.0.0.192"), + "spt": IntegerField(1232), + "msg": StringField("Trailing space in non-final extensions is preserved "), + }, e.Extensions) + }) + + t.Run("padded with extra whitespace chars", func(t *testing.T) { + var e Event + err := e.Unpack(crlfMessage) + assert.NoError(t, err) + assert.Equal(t, map[string]*Field{ + "spt": IntegerField(1232), + "msg": StringField("Trailing space in final extensions is not preserved"), + }, e.Extensions) + }) + + t.Run("internal whitespace chars", func(t *testing.T) { + var e Event + err := e.Unpack(tabMessage) + assert.NoError(t, err) + assert.Equal(t, map[string]*Field{ + "spt": IntegerField(1232), + "src": IPField("127.0.0.1"), + "msg": StringField("Tabs\tand\rcontrol\ncharacters are preserved\t"), + }, e.Extensions) + }) + + t.Run("No tab as separator", func(t *testing.T) { + var e Event + err := e.Unpack(tabNoSepMessage) + assert.Error(t, err) + assert.Equal(t, map[string]*Field{ + "spt": IntegerField(1232), + }, e.Extensions) + }) } func TestEventUnpackWithFullExtensionNames(t *testing.T) { diff --git a/x-pack/filebeat/processors/decode_cef/cef/parser.go b/x-pack/filebeat/processors/decode_cef/cef/parser.go index 2ddcb942401..cd765873527 100644 --- a/x-pack/filebeat/processors/decode_cef/cef/parser.go +++ b/x-pack/filebeat/processors/decode_cef/cef/parser.go @@ -329,145 +329,177 @@ func (e *Event) unpack(data string) error { case 32: switch data[(p)] { case 32: - goto tr54 + goto tr55 case 61: goto tr46 case 92: - goto tr55 + goto tr56 + } + if 9 <= data[(p)] && data[(p)] <= 13 { + goto tr54 } goto tr53 case 33: switch data[(p)] { case 32: - goto tr56 + goto tr58 case 61: goto tr46 case 92: + goto tr59 + } + if 9 <= data[(p)] && data[(p)] <= 13 { goto tr57 } goto tr48 case 34: switch data[(p)] { case 32: - goto tr56 + goto tr58 case 61: goto tr46 case 92: - goto tr57 + goto tr59 case 95: - goto tr58 + goto tr60 } switch { - case data[(p)] < 65: - if 48 <= data[(p)] && data[(p)] <= 57 { - goto tr58 + case data[(p)] < 48: + if 9 <= data[(p)] && data[(p)] <= 13 { + goto tr57 } - case data[(p)] > 90: - if 97 <= data[(p)] && data[(p)] <= 122 { - goto tr58 + case data[(p)] > 57: + switch { + case data[(p)] > 90: + if 97 <= data[(p)] && data[(p)] <= 122 { + goto tr60 + } + case data[(p)] >= 65: + goto tr60 } default: - goto tr58 + goto tr60 } goto tr48 case 35: switch data[(p)] { case 32: - goto tr56 + goto tr58 case 44: - goto tr59 + goto tr61 case 46: - goto tr59 + goto tr61 case 61: - goto tr60 + goto tr62 case 92: - goto tr57 - case 95: goto tr59 + case 95: + goto tr61 } switch { - case data[(p)] < 65: - if 48 <= data[(p)] && data[(p)] <= 57 { - goto tr59 + case data[(p)] < 48: + if 9 <= data[(p)] && data[(p)] <= 13 { + goto tr57 } - case data[(p)] > 93: - if 97 <= data[(p)] && data[(p)] <= 122 { - goto tr59 + case data[(p)] > 57: + switch { + case data[(p)] > 93: + if 97 <= data[(p)] && data[(p)] <= 122 { + goto tr61 + } + case data[(p)] >= 65: + goto tr61 } default: - goto tr59 + goto tr61 } goto tr48 case 36: switch data[(p)] { case 32: - goto tr62 + goto tr65 case 61: goto tr46 case 92: - goto tr63 + goto tr66 } - goto tr61 + if 9 <= data[(p)] && data[(p)] <= 13 { + goto tr64 + } + goto tr63 case 37: switch data[(p)] { case 32: - goto tr64 + goto tr68 case 61: goto tr46 case 92: - goto tr65 + goto tr69 + } + if 9 <= data[(p)] && data[(p)] <= 13 { + goto tr67 } goto tr47 case 38: switch data[(p)] { case 32: - goto tr64 + goto tr68 case 61: goto tr46 case 92: - goto tr65 + goto tr69 case 95: - goto tr66 + goto tr70 } switch { - case data[(p)] < 65: - if 48 <= data[(p)] && data[(p)] <= 57 { - goto tr66 + case data[(p)] < 48: + if 9 <= data[(p)] && data[(p)] <= 13 { + goto tr67 } - case data[(p)] > 90: - if 97 <= data[(p)] && data[(p)] <= 122 { - goto tr66 + case data[(p)] > 57: + switch { + case data[(p)] > 90: + if 97 <= data[(p)] && data[(p)] <= 122 { + goto tr70 + } + case data[(p)] >= 65: + goto tr70 } default: - goto tr66 + goto tr70 } goto tr47 case 39: switch data[(p)] { case 32: - goto tr64 + goto tr68 case 44: - goto tr67 + goto tr71 case 46: - goto tr67 + goto tr71 case 61: - goto tr60 + goto tr62 case 92: - goto tr65 + goto tr69 case 95: - goto tr67 + goto tr71 } switch { - case data[(p)] < 65: - if 48 <= data[(p)] && data[(p)] <= 57 { + case data[(p)] < 48: + if 9 <= data[(p)] && data[(p)] <= 13 { goto tr67 } - case data[(p)] > 93: - if 97 <= data[(p)] && data[(p)] <= 122 { - goto tr67 + case data[(p)] > 57: + switch { + case data[(p)] > 93: + if 97 <= data[(p)] && data[(p)] <= 122 { + goto tr71 + } + case data[(p)] >= 65: + goto tr71 } default: - goto tr67 + goto tr71 } goto tr47 case 26: @@ -678,16 +710,16 @@ func (e *Event) unpack(data string) error { tr43: cs = 25 goto f0 - tr65: + tr69: cs = 26 goto _again - tr63: + tr66: cs = 26 goto f20 - tr57: + tr59: cs = 27 goto _again - tr55: + tr56: cs = 27 goto f20 tr49: @@ -708,43 +740,55 @@ func (e *Event) unpack(data string) error { tr45: cs = 32 goto f14 + tr57: + cs = 33 + goto _again tr48: cs = 33 goto f16 tr53: cs = 33 goto f19 - tr56: - cs = 34 - goto f16 tr54: + cs = 33 + goto f20 + tr58: cs = 34 - goto f19 - tr59: + goto _again + tr55: + cs = 34 + goto f20 + tr61: cs = 35 goto f16 - tr58: + tr60: cs = 35 goto f22 - tr60: + tr62: cs = 36 goto f14 + tr67: + cs = 37 + goto _again tr47: cs = 37 goto f16 - tr61: + tr63: cs = 37 goto f19 tr64: + cs = 37 + goto f20 + tr68: cs = 38 - goto f16 - tr62: + goto _again + tr65: cs = 38 - goto f19 - tr67: + goto f20 + tr71: cs = 39 goto f16 - tr66: + tr70: cs = 39 goto f23 tr52: @@ -973,7 +1017,7 @@ func (e *Event) unpack(data string) error { extKey, extValueStart, extValueEnd = "", 0, 0 } -//line parser.go:847 +//line parser.go:883 } }