From 9011ed7d4c43ecf6ce2fe44ca55094ab24bb9043 Mon Sep 17 00:00:00 2001 From: Tudor Golubenco Date: Tue, 30 May 2017 00:14:44 +0200 Subject: [PATCH] Nginx module: use first not private IP address as remote_ip A common customization to the nginx logs is to add the contents of the X-Forwarded-For header in front of the remote IPs. This typically results in a list of remote IPs. This adds a new field `remote_ip_list` which is an array, and uses a Painless script to automatically select the first non-private IP for the `remote_ip` field, which is the field on which GeoIP is applied. Fixes #4322. --- filebeat/docs/fields.asciidoc | 42 +- .../module/apache2/access/_meta/fields.yml | 8 + filebeat/module/nginx/access/_meta/fields.yml | 17 +- .../module/nginx/access/ingest/default.json | 17 +- filebeat/module/nginx/access/test/test.log | 4 + .../nginx/access/test/test.log-expected.json | 445 +++++++++++++----- libbeat/template/field.go | 4 +- libbeat/template/fields.go | 4 +- 8 files changed, 428 insertions(+), 113 deletions(-) diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index bbe7e419a646..8eadbbf9cb10 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -233,6 +233,22 @@ type: geo_point The longitude and latitude. +[float] +=== apache2.access.geoip.region_name + +type: keyword + +The region name. + + +[float] +=== apache2.access.geoip.city_name + +type: keyword + +The city name. + + [float] == error Fields @@ -953,12 +969,20 @@ Contains fields for the Nginx access logs. +[float] +=== nginx.access.remote_ip_list + +type: array + +An array of remote IP addresses. It is a list because it is common to include, besides the client IP address, IP addresses from headers like `X-Forwarded-For`. See also the `remote_ip` field. + + [float] === nginx.access.remote_ip type: keyword -Client IP address. +Client IP address. The first public IP address from the `remote_ip_list` array. If no public IP addresses are present, this field contains the first private IP address from the `remote_ip_list` array. [float] @@ -1141,6 +1165,22 @@ type: geo_point The longitude and latitude. +[float] +=== nginx.access.geoip.region_name + +type: keyword + +The region name. + + +[float] +=== nginx.access.geoip.city_name + +type: keyword + +The city name. + + [float] == error Fields diff --git a/filebeat/module/apache2/access/_meta/fields.yml b/filebeat/module/apache2/access/_meta/fields.yml index 97fabdc5cab3..be09717198ce 100644 --- a/filebeat/module/apache2/access/_meta/fields.yml +++ b/filebeat/module/apache2/access/_meta/fields.yml @@ -104,4 +104,12 @@ type: geo_point description: > The longitude and latitude. + - name: region_name + type: keyword + description: > + The region name. + - name: city_name + type: keyword + description: > + The city name. diff --git a/filebeat/module/nginx/access/_meta/fields.yml b/filebeat/module/nginx/access/_meta/fields.yml index 0b5f1eb275ed..38e89be9ddb2 100644 --- a/filebeat/module/nginx/access/_meta/fields.yml +++ b/filebeat/module/nginx/access/_meta/fields.yml @@ -3,10 +3,17 @@ description: > Contains fields for the Nginx access logs. fields: + - name: remote_ip_list + type: array + description: > + An array of remote IP addresses. It is a list because it is common to include, besides the client + IP address, IP addresses from headers like `X-Forwarded-For`. See also the `remote_ip` field. - name: remote_ip type: keyword description: > - Client IP address. + Client IP address. The first public IP address from the `remote_ip_list` array. If no public IP + addresses are present, this field contains the first private IP address from the `remote_ip_list` + array. - name: user_name type: keyword description: > @@ -104,4 +111,12 @@ type: geo_point description: > The longitude and latitude. + - name: region_name + type: keyword + description: > + The region name. + - name: city_name + type: keyword + description: > + The city name. diff --git a/filebeat/module/nginx/access/ingest/default.json b/filebeat/module/nginx/access/ingest/default.json index 2e7a3d88ba08..ecb3df51b92b 100644 --- a/filebeat/module/nginx/access/ingest/default.json +++ b/filebeat/module/nginx/access/ingest/default.json @@ -4,11 +4,24 @@ "grok": { "field": "message", "patterns":[ - "%{IPORHOST:nginx.access.remote_ip}(,\\s%{IPORHOST})* - %{DATA:nginx.access.user_name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{WORD:nginx.access.method} %{DATA:nginx.access.url} HTTP/%{NUMBER:nginx.access.http_version}\" %{NUMBER:nginx.access.response_code} %{NUMBER:nginx.access.body_sent.bytes} \"%{DATA:nginx.access.referrer}\" \"%{DATA:nginx.access.agent}\"" + "\"?%{IP_LIST:nginx.access.remote_ip_list} - %{DATA:nginx.access.user_name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{WORD:nginx.access.method} %{DATA:nginx.access.url} HTTP/%{NUMBER:nginx.access.http_version}\" %{NUMBER:nginx.access.response_code} %{NUMBER:nginx.access.body_sent.bytes} \"%{DATA:nginx.access.referrer}\" \"%{DATA:nginx.access.agent}\"" ], + "pattern_definitions": { + "IP_LIST": "%{IP}(\"?,?\\s*%{IP})*" + }, "ignore_missing": true } - },{ + }, { + "split": { + "field": "nginx.access.remote_ip_list", + "separator": "\"?,?\\s+" + } + }, { + "script": { + "lang": "painless", + "inline": "boolean isPrivate(def ip) { try { StringTokenizer tok = new StringTokenizer(ip, '.'); int firstByte = Integer.parseInt(tok.nextToken()); int secondByte = Integer.parseInt(tok.nextToken()); if (firstByte == 10) { return true; } if (firstByte == 192 && secondByte == 168) { return true; } if (firstByte == 172 && secondByte >= 16 && secondByte <= 31) { return true; } if (firstByte == 127) { return true; } return false; } catch (Exception e) { return false; } } def found = false; for (def item : ctx.nginx.access.remote_ip_list) { if (!isPrivate(item)) { ctx.nginx.access.remote_ip = item; found = true; break; } } if (!found) { ctx.nginx.access.remote_ip = ctx.nginx.access.remote_ip_list[0]; }" + } + }, { "remove":{ "field": "message" } diff --git a/filebeat/module/nginx/access/test/test.log b/filebeat/module/nginx/access/test/test.log index 952c54c202f8..e303a6d516d7 100644 --- a/filebeat/module/nginx/access/test/test.log +++ b/filebeat/module/nginx/access/test/test.log @@ -1,2 +1,6 @@ 10.0.0.2, 10.0.0.1, 127.0.0.1 - - [07/Dec/2016:11:05:07 +0100] "GET /ocelot HTTP/1.1" 200 571 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0" 172.17.0.1 - - [29/May/2017:19:02:48 +0000] "GET /stringpatch HTTP/1.1" 404 612 "-" "Mozilla/5.0 (Windows NT 6.1; rv:15.0) Gecko/20120716 Firefox/15.0a2" "-" +10.0.0.2, 10.0.0.1, 85.181.35.98 - - [07/Dec/2016:11:05:07 +0100] "GET /ocelot HTTP/1.1" 200 571 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0" +85.181.35.98 - - [07/Dec/2016:11:05:07 +0100] "GET /ocelot HTTP/1.1" 200 571 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0" +"10.5.102.222, 199.96.1.1, 204.246.1.1" 10.2.1.185 - - [22/Jan/2016:13:18:29 +0000] "GET /assets/xxxx?q=100 HTTP/1.1" 200 25507 "-" "Amazon CloudFront" +2a03:0000:10ff:f00f:0000:0000:0:8000, 10.225.192.17 10.2.2.121 - - [30/Dec/2016:06:47:09 +0000] "GET /test.html HTTP/1.1" 404 8571 "-" "Mozilla/5.0 (compatible; Facebot 1.0; https://developers.facebook.com/docs/sharing/webmasters/crawler)" diff --git a/filebeat/module/nginx/access/test/test.log-expected.json b/filebeat/module/nginx/access/test/test.log-expected.json index afb3999cefd8..9b31bf3fa709 100644 --- a/filebeat/module/nginx/access/test/test.log-expected.json +++ b/filebeat/module/nginx/access/test/test.log-expected.json @@ -1,109 +1,344 @@ [ - { - "_index": "filebeat-2016.12.27", - "_type": "log", - "_id": "AVlBCaYsqYg9cc5KQfcT", - "_score": null, - "_source": { - "@timestamp": "2016-12-07T10:05:07.000Z", - "offset": 191, - "nginx": { - "access": { - "referrer": "-", - "response_code": "200", - "remote_ip": "10.0.0.2", - "method": "GET", - "user_name": "-", - "http_version": "1.1", - "body_sent": { - "bytes": "571" - }, - "url": "/ocelot", - "user_agent": { - "major": "49", - "minor": "0", - "os": "Mac OS X 10.12", - "os_minor": "12", - "os_major": "10", - "name": "Firefox", - "os_name": "Mac OS X", - "device": "Other" - } - } - }, - "beat": { - "hostname": "192-168-0-7.rdsnet.ro", - "name": "192-168-0-7.rdsnet.ro", - "version": "6.0.0-alpha1" - }, - "read_timestamp": "2016-12-27T15:52:23.304Z", - "source": "module/nginx/access/test/test.log", - "fields": { - "pipeline_id": "nginx-access-with_plugins", - "source_type": "nginx-access" - }, - "prospector": { - "type": "log" + { + "_index" : "filebeat-6.0.0-alpha2-2017.05.30", + "_type" : "doc", + "_id" : "AVxWUuZ8OMOtQBaTipsE", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2016-12-07T10:05:07.000Z", + "offset" : 527, + "nginx" : { + "access" : { + "referrer" : "-", + "response_code" : "200", + "remote_ip" : "85.181.35.98", + "geoip" : { + "continent_name" : "Europe", + "country_iso_code" : "DE", + "location" : { + "lon" : 9.0, + "lat" : 51.0 } - }, - "fields": { - "@timestamp": [ - 1481105107000 - ] - }, - "sort": [ - 1481105107000 - ] - }, - { - "_id": "AVxVuTJrsqw9BQCgtCgi", - "_index": "filebeat-6.0.0-alpha2-2017.05.29", - "_score": null, - "_source": { - "@timestamp": "2017-05-29T19:02:48.000Z", - "beat": { - "hostname": "X1", - "name": "X1", - "version": "6.0.0-alpha2" - }, - "nginx": { - "access": { - "body_sent": { - "bytes": "612" - }, - "http_version": "1.1", - "method": "GET", - "referrer": "-", - "remote_ip": "172.17.0.1", - "response_code": "404", - "url": "/stringpatch", - "user_agent": { - "device": "Other", - "major": "15", - "minor": "0", - "name": "Firefox Alpha", - "os": "Windows 7", - "os_name": "Windows 7", - "patch": "a2" - }, - "user_name": "-" - } - }, - "offset": 341, - "prospector": { - "type": "log" - }, - "read_timestamp": "2017-05-29T19:40:14.373Z", - "source": "/home/exekias/go/src/github.com/elastic/beats/filebeat/nginx.log" - }, - "_type": "doc", - "fields": { - "@timestamp": [ - 1496084568000 - ] - }, - "sort": [ - 1496084568000 - ] + }, + "method" : "GET", + "user_name" : "-", + "http_version" : "1.1", + "body_sent" : { + "bytes" : "571" + }, + "remote_ip_list" : [ + "10.0.0.2", + "10.0.0.1", + "85.181.35.98" + ], + "url" : "/ocelot", + "user_agent" : { + "major" : "49", + "minor" : "0", + "os" : "Mac OS X 10.12", + "os_minor" : "12", + "os_major" : "10", + "name" : "Firefox", + "os_name" : "Mac OS X", + "device" : "Other" + } + } + }, + "beat" : { + "hostname" : "a-mac-with-esc-key-2.local", + "name" : "a-mac-with-esc-key-2.local", + "version" : "6.0.0-alpha2" + }, + "prospector" : { + "type" : "log" + }, + "read_timestamp" : "2017-05-29T22:28:06.246Z", + "source" : "/Users/tsg/src/github.com/elastic/beats/filebeat/module/nginx/access/test/test.log", + "fileset" : { + "module" : "nginx", + "name" : "access" + } } + }, + { + "_index" : "filebeat-6.0.0-alpha2-2017.05.30", + "_type" : "doc", + "_id" : "AVxWUuZ8OMOtQBaTipsD", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2017-05-29T19:02:48.000Z", + "offset" : 341, + "nginx" : { + "access" : { + "referrer" : "-", + "response_code" : "404", + "remote_ip" : "172.17.0.1", + "method" : "GET", + "user_name" : "-", + "http_version" : "1.1", + "body_sent" : { + "bytes" : "612" + }, + "remote_ip_list" : [ + "172.17.0.1" + ], + "url" : "/stringpatch", + "user_agent" : { + "patch" : "a2", + "major" : "15", + "minor" : "0", + "os" : "Windows 7", + "name" : "Firefox Alpha", + "os_name" : "Windows 7", + "device" : "Other" + } + } + }, + "beat" : { + "hostname" : "a-mac-with-esc-key-2.local", + "name" : "a-mac-with-esc-key-2.local", + "version" : "6.0.0-alpha2" + }, + "prospector" : { + "type" : "log" + }, + "read_timestamp" : "2017-05-29T22:28:06.246Z", + "source" : "/Users/tsg/src/github.com/elastic/beats/filebeat/module/nginx/access/test/test.log", + "fileset" : { + "module" : "nginx", + "name" : "access" + } + } + }, + { + "_index" : "filebeat-6.0.0-alpha2-2017.05.30", + "_type" : "doc", + "_id" : "AVxWUuZ8OMOtQBaTipsF", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2016-12-07T10:05:07.000Z", + "offset" : 693, + "nginx" : { + "access" : { + "referrer" : "-", + "response_code" : "200", + "remote_ip" : "85.181.35.98", + "geoip" : { + "continent_name" : "Europe", + "country_iso_code" : "DE", + "location" : { + "lon" : 9.0, + "lat" : 51.0 + } + }, + "method" : "GET", + "user_name" : "-", + "http_version" : "1.1", + "body_sent" : { + "bytes" : "571" + }, + "remote_ip_list" : [ + "85.181.35.98" + ], + "url" : "/ocelot", + "user_agent" : { + "major" : "49", + "minor" : "0", + "os" : "Mac OS X 10.12", + "os_minor" : "12", + "os_major" : "10", + "name" : "Firefox", + "os_name" : "Mac OS X", + "device" : "Other" + } + } + }, + "beat" : { + "hostname" : "a-mac-with-esc-key-2.local", + "name" : "a-mac-with-esc-key-2.local", + "version" : "6.0.0-alpha2" + }, + "prospector" : { + "type" : "log" + }, + "read_timestamp" : "2017-05-29T22:28:06.246Z", + "source" : "/Users/tsg/src/github.com/elastic/beats/filebeat/module/nginx/access/test/test.log", + "fileset" : { + "module" : "nginx", + "name" : "access" + } + } + }, + { + "_index" : "filebeat-6.0.0-alpha2-2017.05.30", + "_type" : "doc", + "_id" : "AVxWUuZ8OMOtQBaTipsC", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2016-12-07T10:05:07.000Z", + "offset" : 183, + "nginx" : { + "access" : { + "referrer" : "-", + "response_code" : "200", + "remote_ip" : "10.0.0.2", + "method" : "GET", + "user_name" : "-", + "http_version" : "1.1", + "body_sent" : { + "bytes" : "571" + }, + "remote_ip_list" : [ + "10.0.0.2", + "10.0.0.1", + "127.0.0.1" + ], + "url" : "/ocelot", + "user_agent" : { + "major" : "49", + "minor" : "0", + "os" : "Mac OS X 10.12", + "os_minor" : "12", + "os_major" : "10", + "name" : "Firefox", + "os_name" : "Mac OS X", + "device" : "Other" + } + } + }, + "beat" : { + "hostname" : "a-mac-with-esc-key-2.local", + "name" : "a-mac-with-esc-key-2.local", + "version" : "6.0.0-alpha2" + }, + "prospector" : { + "type" : "log" + }, + "read_timestamp" : "2017-05-29T22:28:06.245Z", + "source" : "/Users/tsg/src/github.com/elastic/beats/filebeat/module/nginx/access/test/test.log", + "fileset" : { + "module" : "nginx", + "name" : "access" + } + } + }, + { + "_index" : "filebeat-6.0.0-alpha2-2017.05.30", + "_type" : "doc", + "_id" : "AVxWUuZ8OMOtQBaTipsG", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2016-01-22T13:18:29.000Z", + "offset" : 845, + "nginx" : { + "access" : { + "referrer" : "-", + "response_code" : "200", + "remote_ip" : "199.96.1.1", + "geoip" : { + "continent_name" : "North America", + "city_name" : "Springfield", + "country_iso_code" : "US", + "region_name" : "Illinois", + "location" : { + "lon" : -89.6859, + "lat" : 39.772 + } + }, + "method" : "GET", + "user_name" : "-", + "http_version" : "1.1", + "body_sent" : { + "bytes" : "25507" + }, + "remote_ip_list" : [ + "10.5.102.222", + "199.96.1.1", + "204.246.1.1", + "10.2.1.185" + ], + "url" : "/assets/xxxx?q=100", + "user_agent" : { + "os" : "Other", + "name" : "Other", + "os_name" : "Other", + "device" : "Other" + } + } + }, + "beat" : { + "hostname" : "a-mac-with-esc-key-2.local", + "name" : "a-mac-with-esc-key-2.local", + "version" : "6.0.0-alpha2" + }, + "prospector" : { + "type" : "log" + }, + "read_timestamp" : "2017-05-29T22:28:06.246Z", + "source" : "/Users/tsg/src/github.com/elastic/beats/filebeat/module/nginx/access/test/test.log", + "fileset" : { + "module" : "nginx", + "name" : "access" + } + } + }, + { + "_index" : "filebeat-6.0.0-alpha2-2017.05.30", + "_type" : "doc", + "_id" : "AVxWUuZ8OMOtQBaTipsH", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2016-12-30T06:47:09.000Z", + "offset" : 1085, + "nginx" : { + "access" : { + "referrer" : "-", + "response_code" : "404", + "remote_ip" : "2a03:0000:10ff:f00f:0000:0000:0:8000", + "geoip" : { + "continent_name" : "Europe", + "country_iso_code" : "PT", + "location" : { + "lon" : -8.13057, + "lat" : 39.6945 + } + }, + "method" : "GET", + "user_name" : "-", + "http_version" : "1.1", + "body_sent" : { + "bytes" : "8571" + }, + "remote_ip_list" : [ + "2a03:0000:10ff:f00f:0000:0000:0:8000", + "10.225.192.17", + "10.2.2.121" + ], + "url" : "/test.html", + "user_agent" : { + "major" : "1", + "minor" : "0", + "os" : "Other", + "name" : "Facebot", + "os_name" : "Other", + "device" : "Spider" + } + } + }, + "beat" : { + "hostname" : "a-mac-with-esc-key-2.local", + "name" : "a-mac-with-esc-key-2.local", + "version" : "6.0.0-alpha2" + }, + "prospector" : { + "type" : "log" + }, + "read_timestamp" : "2017-05-29T22:28:06.246Z", + "source" : "/Users/tsg/src/github.com/elastic/beats/filebeat/module/nginx/access/test/test.log", + "fileset" : { + "module" : "nginx", + "name" : "access" + } + } + } ] diff --git a/libbeat/template/field.go b/libbeat/template/field.go index 7d7e74955e68..64b08f1a642d 100644 --- a/libbeat/template/field.go +++ b/libbeat/template/field.go @@ -109,9 +109,7 @@ func (f *Field) text() common.MapStr { } func (f *Field) array() common.MapStr { - return common.MapStr{ - "properties": common.MapStr{}, - } + return common.MapStr{} } func (f *Field) object() common.MapStr { diff --git a/libbeat/template/fields.go b/libbeat/template/fields.go index 1de5e2a362d5..024954dbf277 100644 --- a/libbeat/template/fields.go +++ b/libbeat/template/fields.go @@ -53,7 +53,9 @@ func (f Fields) process(path string, esVersion Version) common.MapStr { mapping = field.other() } - output.Put(generateKey(field.Name), mapping) + if len(mapping) > 0 { + output.Put(generateKey(field.Name), mapping) + } } return output