From 8ad75cc08aab72668339be694bc77266dfbe76e6 Mon Sep 17 00:00:00 2001 From: Alexander Reelsen Date: Thu, 19 Dec 2019 16:57:44 +0100 Subject: [PATCH 1/2] Sync grok patterns with logstash patterns In order to ensure that logsash and Elasticsearch are able to understand the same patterns, this commit adds a few patterns and changes a few. --- libs/grok/src/main/java/org/elasticsearch/grok/Grok.java | 6 +++--- libs/grok/src/main/resources/patterns/aws | 2 ++ libs/grok/src/main/resources/patterns/bind | 3 +++ libs/grok/src/main/resources/patterns/firewalls | 8 +++++++- libs/grok/src/main/resources/patterns/grok-patterns | 2 ++ libs/grok/src/main/resources/patterns/java | 4 ++-- libs/grok/src/main/resources/patterns/linux-syslog | 2 +- libs/grok/src/main/resources/patterns/maven | 1 + libs/grok/src/main/resources/patterns/redis | 2 +- libs/grok/src/main/resources/patterns/squid | 4 ++++ 10 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 libs/grok/src/main/resources/patterns/bind create mode 100644 libs/grok/src/main/resources/patterns/maven create mode 100644 libs/grok/src/main/resources/patterns/squid diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java index f5f5a482bf493..5d5dc5d56f504 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java @@ -277,9 +277,9 @@ public static Map getBuiltinPatterns() { private static Map loadBuiltinPatterns() throws IOException { // Code for loading built-in grok patterns packaged with the jar file: String[] PATTERN_NAMES = new String[] { - "aws", "bacula", "bro", "exim", "firewalls", "grok-patterns", "haproxy", - "java", "junos", "linux-syslog", "mcollective-patterns", "mongodb", "nagios", - "postgresql", "rails", "redis", "ruby" + "aws", "bacula", "bind", "bro", "exim", "firewalls", "grok-patterns", "haproxy", + "java", "junos", "linux-syslog", "maven", "mcollective-patterns", "mongodb", "nagios", + "postgresql", "rails", "redis", "ruby", "squid" }; Map builtinPatterns = new HashMap<>(); for (String pattern : PATTERN_NAMES) { diff --git a/libs/grok/src/main/resources/patterns/aws b/libs/grok/src/main/resources/patterns/aws index 71edbc9f2966d..2c6a2ca6d4a59 100644 --- a/libs/grok/src/main/resources/patterns/aws +++ b/libs/grok/src/main/resources/patterns/aws @@ -9,3 +9,5 @@ ELB_URI %{URIPROTO:proto}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST:urihost})?(?:%{ ELB_REQUEST_LINE (?:%{WORD:verb} %{ELB_URI:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest}) ELB_ACCESS_LOG %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:elb} %{IP:clientip}:%{INT:clientport:int} (?:(%{IP:backendip}:?:%{INT:backendport:int})|-) %{NUMBER:request_processing_time:float} %{NUMBER:backend_processing_time:float} %{NUMBER:response_processing_time:float} %{INT:response:int} %{INT:backend_response:int} %{INT:received_bytes:int} %{INT:bytes:int} "%{ELB_REQUEST_LINE}" + +CLOUDFRONT_ACCESS_LOG (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY}\t%{TIME})\t%{WORD:x_edge_location}\t(?:%{NUMBER:sc_bytes:int}|-)\t%{IPORHOST:clientip}\t%{WORD:cs_method}\t%{HOSTNAME:cs_host}\t%{NOTSPACE:cs_uri_stem}\t%{NUMBER:sc_status:int}\t%{GREEDYDATA:referrer}\t%{GREEDYDATA:agent}\t%{GREEDYDATA:cs_uri_query}\t%{GREEDYDATA:cookies}\t%{WORD:x_edge_result_type}\t%{NOTSPACE:x_edge_request_id}\t%{HOSTNAME:x_host_header}\t%{URIPROTO:cs_protocol}\t%{INT:cs_bytes:int}\t%{GREEDYDATA:time_taken:float}\t%{GREEDYDATA:x_forwarded_for}\t%{GREEDYDATA:ssl_protocol}\t%{GREEDYDATA:ssl_cipher}\t%{GREEDYDATA:x_edge_response_result_type} diff --git a/libs/grok/src/main/resources/patterns/bind b/libs/grok/src/main/resources/patterns/bind new file mode 100644 index 0000000000000..31e4414a350b6 --- /dev/null +++ b/libs/grok/src/main/resources/patterns/bind @@ -0,0 +1,3 @@ +BIND9_TIMESTAMP %{MONTHDAY}[-]%{MONTH}[-]%{YEAR} %{TIME} + +BIND9 %{BIND9_TIMESTAMP:timestamp} queries: %{LOGLEVEL:loglevel}: client %{IP:clientip}#%{POSINT:clientport} \(%{GREEDYDATA:query}\): query: %{GREEDYDATA:query} IN %{GREEDYDATA:querytype} \(%{IP:dns}\) diff --git a/libs/grok/src/main/resources/patterns/firewalls b/libs/grok/src/main/resources/patterns/firewalls index 03c3e5aff0cf7..0a0cbf67a6722 100644 --- a/libs/grok/src/main/resources/patterns/firewalls +++ b/libs/grok/src/main/resources/patterns/firewalls @@ -36,7 +36,7 @@ CISCOFW106006_106007_106010 %{CISCO_ACTION:action} %{CISCO_DIRECTION:direction} # ASA-3-106014 CISCOFW106014 %{CISCO_ACTION:action} %{CISCO_DIRECTION:direction} %{WORD:protocol} src %{DATA:src_interface}:%{IP:src_ip}(\(%{DATA:src_fwuser}\))? dst %{DATA:dst_interface}:%{IP:dst_ip}(\(%{DATA:dst_fwuser}\))? \(type %{INT:icmp_type}, code %{INT:icmp_code}\) # ASA-6-106015 -CISCOFW106015 %{CISCO_ACTION:action} %{WORD:protocol} \(%{DATA:policy_id}\) from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} flags %{DATA:tcp_flags} on interface %{GREEDYDATA:interface} +CISCOFW106015 %{CISCO_ACTION:action} %{WORD:protocol} \(%{DATA:policy_id}\) from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} flags %{DATA:tcp_flags} on interface %{GREEDYDATA:interface} # ASA-1-106021 CISCOFW106021 %{CISCO_ACTION:action} %{WORD:protocol} reverse path check from %{IP:src_ip} to %{IP:dst_ip} on interface %{GREEDYDATA:interface} # ASA-4-106023 @@ -45,6 +45,8 @@ CISCOFW106023 %{CISCO_ACTION:action}( protocol)? %{WORD:protocol} src %{DATA:src CISCOFW106100_2_3 access-list %{NOTSPACE:policy_id} %{CISCO_ACTION:action} %{WORD:protocol} for user '%{DATA:src_fwuser}' %{DATA:src_interface}/%{IP:src_ip}\(%{INT:src_port}\) -> %{DATA:dst_interface}/%{IP:dst_ip}\(%{INT:dst_port}\) hit-cnt %{INT:hit_count} %{CISCO_INTERVAL:interval} \[%{DATA:hashcode1}, %{DATA:hashcode2}\] # ASA-5-106100 CISCOFW106100 access-list %{NOTSPACE:policy_id} %{CISCO_ACTION:action} %{WORD:protocol} %{DATA:src_interface}/%{IP:src_ip}\(%{INT:src_port}\)(\(%{DATA:src_fwuser}\))? -> %{DATA:dst_interface}/%{IP:dst_ip}\(%{INT:dst_port}\)(\(%{DATA:src_fwuser}\))? hit-cnt %{INT:hit_count} %{CISCO_INTERVAL:interval} \[%{DATA:hashcode1}, %{DATA:hashcode2}\] +# ASA-5-304001 +CISCOFW304001 %{IP:src_ip}(\(%{DATA:src_fwuser}\))? Accessed URL %{IP:dst_ip}:%{GREEDYDATA:dst_url} # ASA-6-110002 CISCOFW110002 %{CISCO_REASON:reason} for %{WORD:protocol} from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} # ASA-6-302010 @@ -84,3 +86,7 @@ CISCOFW733100 \[\s*%{DATA:drop_type}\s*\] drop %{DATA:drop_rate_id} exceeded. Cu # Shorewall firewall logs SHOREWALL (%{SYSLOGTIMESTAMP:timestamp}) (%{WORD:nf_host}) kernel:.*Shorewall:(%{WORD:nf_action1})?:(%{WORD:nf_action2})?.*IN=(%{USERNAME:nf_in_interface})?.*(OUT= *MAC=(%{COMMONMAC:nf_dst_mac}):(%{COMMONMAC:nf_src_mac})?|OUT=%{USERNAME:nf_out_interface}).*SRC=(%{IPV4:nf_src_ip}).*DST=(%{IPV4:nf_dst_ip}).*LEN=(%{WORD:nf_len}).?*TOS=(%{WORD:nf_tos}).?*PREC=(%{WORD:nf_prec}).?*TTL=(%{INT:nf_ttl}).?*ID=(%{INT:nf_id}).?*PROTO=(%{WORD:nf_protocol}).?*SPT=(%{INT:nf_src_port}?.*DPT=%{INT:nf_dst_port}?.*) #== End Shorewall + +#== SuSE Firewall 2 == +SFW2 ((%{SYSLOGTIMESTAMP})|(%{TIMESTAMP_ISO8601}))\s*%{HOSTNAME}\s*kernel\S+\s*%{NAGIOSTIME}\s*SFW2\-INext\-%{NOTSPACE:nf_action}\s*IN=%{USERNAME:nf_in_interface}.*OUT=((\s*%{USERNAME:nf_out_interface})|(\s*))MAC=((%{COMMONMAC:nf_dst_mac}:%{COMMONMAC:nf_src_mac})|(\s*)).*SRC=%{IP:nf_src_ip}\s*DST=%{IP:nf_dst_ip}.*PROTO=%{WORD:nf_protocol}((.*SPT=%{INT:nf_src_port}.*DPT=%{INT:nf_dst_port}.*)|()) +#== End SuSE == diff --git a/libs/grok/src/main/resources/patterns/grok-patterns b/libs/grok/src/main/resources/patterns/grok-patterns index 27bf6732790d6..d7e0afba91dee 100644 --- a/libs/grok/src/main/resources/patterns/grok-patterns +++ b/libs/grok/src/main/resources/patterns/grok-patterns @@ -18,6 +18,8 @@ DATA .*? GREEDYDATA .* QUOTEDSTRING (?>(?"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12} +# URN, allowing use of RFC 2141 section 2.3 reserved characters +URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+ # Networking MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}) diff --git a/libs/grok/src/main/resources/patterns/java b/libs/grok/src/main/resources/patterns/java index 01766b8ebd165..aedd615432fb5 100644 --- a/libs/grok/src/main/resources/patterns/java +++ b/libs/grok/src/main/resources/patterns/java @@ -1,8 +1,8 @@ JAVACLASS (?:[a-zA-Z$_][a-zA-Z$_0-9]*\.)*[a-zA-Z$_][a-zA-Z$_0-9]* #Space is an allowed character to match special cases like 'Native Method' or 'Unknown Source' JAVAFILE (?:[A-Za-z0-9_. -]+) -#Allow special method -JAVAMETHOD (?:()|[a-zA-Z$_][a-zA-Z$_0-9]*) +#Allow special , methods +JAVAMETHOD (?:(<(?:cl)?init>)|[a-zA-Z$_][a-zA-Z$_0-9]*) #Line number is optional in special cases 'Native method' or 'Unknown source' JAVASTACKTRACEPART %{SPACE}at %{JAVACLASS:class}\.%{JAVAMETHOD:method}\(%{JAVAFILE:file}(?::%{NUMBER:line})?\) # Java Logs diff --git a/libs/grok/src/main/resources/patterns/linux-syslog b/libs/grok/src/main/resources/patterns/linux-syslog index dcffb41ba8fef..a03a6536f9dee 100644 --- a/libs/grok/src/main/resources/patterns/linux-syslog +++ b/libs/grok/src/main/resources/patterns/linux-syslog @@ -11,6 +11,6 @@ SYSLOGLINE %{SYSLOGBASE2} %{GREEDYDATA:message} # IETF 5424 syslog(8) format (see http://www.rfc-editor.org/info/rfc5424) SYSLOG5424PRI <%{NONNEGINT:syslog5424_pri}> SYSLOG5424SD \[%{DATA}\]+ -SYSLOG5424BASE %{SYSLOG5424PRI}%{NONNEGINT:syslog5424_ver} +(?:%{TIMESTAMP_ISO8601:syslog5424_ts}|-) +(?:%{HOSTNAME:syslog5424_host}|-) +(-|%{SYSLOG5424PRINTASCII:syslog5424_app}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_proc}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_msgid}) +(?:%{SYSLOG5424SD:syslog5424_sd}|-|) +SYSLOG5424BASE %{SYSLOG5424PRI}%{NONNEGINT:syslog5424_ver} +(?:%{TIMESTAMP_ISO8601:syslog5424_ts}|-) +(?:%{IPORHOST:syslog5424_host}|-) +(-|%{SYSLOG5424PRINTASCII:syslog5424_app}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_proc}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_msgid}) +(?:%{SYSLOG5424SD:syslog5424_sd}|-|) SYSLOG5424LINE %{SYSLOG5424BASE} +%{GREEDYDATA:syslog5424_msg} diff --git a/libs/grok/src/main/resources/patterns/maven b/libs/grok/src/main/resources/patterns/maven new file mode 100644 index 0000000000000..f1dc808871026 --- /dev/null +++ b/libs/grok/src/main/resources/patterns/maven @@ -0,0 +1 @@ +MAVEN_VERSION (?:(\d+)\.)?(?:(\d+)\.)?(\*|\d+)(?:[.-](RELEASE|SNAPSHOT))? diff --git a/libs/grok/src/main/resources/patterns/redis b/libs/grok/src/main/resources/patterns/redis index 8655c4f043e69..341a330665dc6 100644 --- a/libs/grok/src/main/resources/patterns/redis +++ b/libs/grok/src/main/resources/patterns/redis @@ -1,3 +1,3 @@ REDISTIMESTAMP %{MONTHDAY} %{MONTH} %{TIME} REDISLOG \[%{POSINT:pid}\] %{REDISTIMESTAMP:timestamp} \* - +REDISMONLOG %{NUMBER:timestamp} \[%{INT:database} %{IP:client}:%{NUMBER:port}\] "%{WORD:command}"\s?%{GREEDYDATA:params} diff --git a/libs/grok/src/main/resources/patterns/squid b/libs/grok/src/main/resources/patterns/squid new file mode 100644 index 0000000000000..238fff1fda2ff --- /dev/null +++ b/libs/grok/src/main/resources/patterns/squid @@ -0,0 +1,4 @@ +# Pattern squid3 +# Documentation of squid3 logs formats can be found at the following link: +# http://wiki.squid-cache.org/Features/LogFormat +SQUID3 %{NUMBER:timestamp}\s+%{NUMBER:duration}\s%{IP:client_address}\s%{WORD:cache_result}/%{POSINT:status_code}\s%{NUMBER:bytes}\s%{WORD:request_method}\s%{NOTSPACE:url}\s(%{NOTSPACE:user}|-)\s%{WORD:hierarchy_code}/%{IPORHOST:server}\s%{NOTSPACE:content_type} From 4c53e3670ca51c8d475f59077acdda56fe3c09e9 Mon Sep 17 00:00:00 2001 From: Alexander Reelsen Date: Fri, 20 Dec 2019 11:01:20 +0100 Subject: [PATCH 2/2] fix total grok count test --- .../src/test/resources/rest-api-spec/test/ingest/120_grok.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/120_grok.yml b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/120_grok.yml index fa2280e0a11d1..c0aec0e3d7392 100644 --- a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/120_grok.yml +++ b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/120_grok.yml @@ -152,5 +152,5 @@ teardown: "Test Grok Patterns Retrieval": - do: ingest.processor_grok: {} - - length: { patterns: 303 } + - length: { patterns: 312 } - match: { patterns.PATH: "(?:%{UNIXPATH}|%{WINPATH})" }