From 8e9a2761d765bb3e106b897bb19bd6624974b723 Mon Sep 17 00:00:00 2001 From: Taylor Swanson <90622908+taylor-swanson@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:32:47 -0500 Subject: [PATCH] [libbeat] Fix parsing of RFC 3164 process IDs in syslog processor (#38982) - The pattern for parsing process IDs was too relaxed and would match everything between the first opening and the last closing square bracket in a message. If the message included multiple closing square brackets, the process ID would be set to not only the process ID, but also whatever leads up to the last closing square bracket. - The pattern has now been locked down to only digits. - Added test case. --- CHANGELOG.next.asciidoc | 1 + libbeat/reader/syslog/parser/rfc3164.rl | 2 +- libbeat/reader/syslog/rfc3164_gen.go | 60 +++---------------------- libbeat/reader/syslog/rfc3164_test.go | 13 ++++++ 4 files changed, 20 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index c9d0f8127736..c28b56a184ee 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -91,6 +91,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Change cache processor documentation from `write_period` to `write_interval`. {pull}38561[38561] - Fix cache processor expiries heap cleanup on partial file writes. {pull}38561[38561] - Fix cache processor expiries infinite growth when large a large TTL is used and recurring keys are cached. {pull}38561[38561] +- Fix parsing of RFC 3164 process IDs in syslog processor. {issue}38947[38947] {pull}38982[38982] *Auditbeat* - Set field types to correctly match ECS in sessionmd processor {issue}38955[38955] {pull}38994[38994] diff --git a/libbeat/reader/syslog/parser/rfc3164.rl b/libbeat/reader/syslog/parser/rfc3164.rl index 0dac77314040..709c049eb06e 100644 --- a/libbeat/reader/syslog/parser/rfc3164.rl +++ b/libbeat/reader/syslog/parser/rfc3164.rl @@ -16,7 +16,7 @@ hostname = graph+ >tok %set_hostname; tag = (print -- [ :\[])+ >tok %set_tag; - content_value = print+ >tok %set_content; + content_value = digit+ >tok %set_content; content = '[' content_value ']'; msg = (tag content? ':' sp)? any+ >tok %set_msg; }%% diff --git a/libbeat/reader/syslog/rfc3164_gen.go b/libbeat/reader/syslog/rfc3164_gen.go index 852ec066f597..b4c4146ba82b 100644 --- a/libbeat/reader/syslog/rfc3164_gen.go +++ b/libbeat/reader/syslog/rfc3164_gen.go @@ -80,10 +80,6 @@ func parseRFC3164(data string, loc *time.Location) (message, error) { goto st_case_29 case 30: goto st_case_30 - case 31: - goto st_case_31 - case 32: - goto st_case_32 case 7: goto st_case_7 case 8: @@ -317,7 +313,7 @@ func parseRFC3164(data string, loc *time.Location) (message, error) { goto _test_eof28 } st_case_28: - if 32 <= data[p] && data[p] <= 126 { + if 48 <= data[p] && data[p] <= 57 { goto tr37 } goto st24 @@ -334,7 +330,7 @@ func parseRFC3164(data string, loc *time.Location) (message, error) { if data[p] == 93 { goto tr39 } - if 32 <= data[p] && data[p] <= 126 { + if 48 <= data[p] && data[p] <= 57 { goto st29 } goto st24 @@ -342,56 +338,16 @@ func parseRFC3164(data string, loc *time.Location) (message, error) { m.setContent(data[tok:p]) - goto st30 - tr42: - - m.setContent(data[tok:p]) - - tok = p - goto st30 st30: if p++; p == pe { goto _test_eof30 } st_case_30: - switch data[p] { - case 58: - goto st31 - case 93: - goto tr39 - } - if 32 <= data[p] && data[p] <= 126 { - goto st29 - } - goto st24 - st31: - if p++; p == pe { - goto _test_eof31 - } - st_case_31: - switch data[p] { - case 32: - goto st32 - case 93: - goto tr39 - } - if 33 <= data[p] && data[p] <= 126 { - goto st29 + if data[p] == 58 { + goto st26 } goto st24 - st32: - if p++; p == pe { - goto _test_eof32 - } - st_case_32: - if data[p] == 93 { - goto tr42 - } - if 32 <= data[p] && data[p] <= 126 { - goto tr37 - } - goto tr11 st7: if p++; p == pe { goto _test_eof7 @@ -816,12 +772,6 @@ func parseRFC3164(data string, loc *time.Location) (message, error) { _test_eof30: cs = 30 goto _test_eof - _test_eof31: - cs = 31 - goto _test_eof - _test_eof32: - cs = 32 - goto _test_eof _test_eof7: cs = 7 goto _test_eof @@ -879,7 +829,7 @@ func parseRFC3164(data string, loc *time.Location) (message, error) { } if p == eof { switch cs { - case 24, 25, 26, 27, 28, 29, 30, 31, 32: + case 24, 25, 26, 27, 28, 29, 30: m.setMsg(data[tok:p]) diff --git a/libbeat/reader/syslog/rfc3164_test.go b/libbeat/reader/syslog/rfc3164_test.go index d1c75fe574eb..a2c80e8c263f 100644 --- a/libbeat/reader/syslog/rfc3164_test.go +++ b/libbeat/reader/syslog/rfc3164_test.go @@ -88,6 +88,19 @@ func TestParseRFC3164(t *testing.T) { msg: "message", }, }, + "ok-procid-with-square-brackets-msg": { + in: "<114>Apr 12 13:30:01 aaaaaa001.adm.domain aaaaaa001[25259]: my.some.domain 10.11.12.13 - USERNAME [12/Apr/2024:13:29:59.993 +0200] /skodas \"GET /skodas/group/pod-documentation/aaa HTTP/1.1\" 301 301 290bytes 1 10327", + want: message{ + timestamp: mustParseTime(time.Stamp, "Apr 12 13:30:01", time.Local), + priority: 114, + facility: 14, + severity: 2, + hostname: "aaaaaa001.adm.domain", + process: "aaaaaa001", + pid: "25259", + msg: "my.some.domain 10.11.12.13 - USERNAME [12/Apr/2024:13:29:59.993 +0200] /skodas \"GET /skodas/group/pod-documentation/aaa HTTP/1.1\" 301 301 290bytes 1 10327", + }, + }, "err-pri-not-a-number": { in: "Oct 11 22:14:15 test-host this is the message", want: message{