From 03f2e0c41951ea092c311270e27c6e0f0303bffc Mon Sep 17 00:00:00 2001 From: Nicolas Ruflin Date: Tue, 9 May 2017 13:08:32 +0200 Subject: [PATCH] Remove document_type from Filebeat (#4204) The `_type` field was removed in elasticsearch 6.0. The initial intention of `document_type` was to define different `_type`. As this does not exist anymore the config option was removed. It is recommend to use `fields` instead to add specific fields to a prospector. * Adjust tests accordingly --- CHANGELOG.asciidoc | 1 + filebeat/_meta/common.full.p2.yml | 5 ----- filebeat/_meta/fields.common.yml | 5 ----- filebeat/docs/fields.asciidoc | 8 -------- filebeat/docs/migration.asciidoc | 13 +++---------- .../configuration/filebeat-options.asciidoc | 7 ------- filebeat/filebeat.full.yml | 5 ----- filebeat/harvester/config.go | 2 -- filebeat/harvester/log.go | 1 - filebeat/tests/system/test_json.py | 10 ++++------ filebeat/tests/system/test_processors.py | 8 ++++---- filebeat/tests/system/test_prospector.py | 4 ++-- libbeat/tests/system/beat/beat.py | 2 +- 13 files changed, 15 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 4b768bc955a..41b49ea66ec 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -96,6 +96,7 @@ https://github.com/elastic/beats/compare/v5.4.0...v6.0.0-alpha1[View commits] - Remove code to convert states from 1.x. {pull}3767[3767] - Remove deprecated config options force_close_files and close_older. {pull}3768[3768] - Change clean_removed behaviour to also remove states for files which cannot be found anymore under the same name. {pull}3827[3827] +- Remove `document_type` config option. Use `fields` instead. {pull}4204[4204] *Packetbeat* diff --git a/filebeat/_meta/common.full.p2.yml b/filebeat/_meta/common.full.p2.yml index 6fdaa8cd4a8..61d34414c9d 100644 --- a/filebeat/_meta/common.full.p2.yml +++ b/filebeat/_meta/common.full.p2.yml @@ -67,11 +67,6 @@ filebeat.prospectors: # Time strings like 2h (2 hours), 5m (5 minutes) can be used. #ignore_older: 0 - # Type to be published in the 'type' field. For Elasticsearch output, - # the type defines the document type these entries should be stored - # in. Default: log - #document_type: log - # How often the prospector checks for new files in the paths that are specified # for harvesting. Specify 1s to scan the directory as frequently as possible # without causing Filebeat to scan too frequently. Default: 10s. diff --git a/filebeat/_meta/fields.common.yml b/filebeat/_meta/fields.common.yml index d33ef10a6c7..0c515594660 100644 --- a/filebeat/_meta/fields.common.yml +++ b/filebeat/_meta/fields.common.yml @@ -23,11 +23,6 @@ description: > The content of the line read from the log file. - - name: type - required: true - description: > - The name of the log event. This field is set to the value specified for the `document_type` option in the prospector section of the Filebeat config file. - - name: input_type required: true description: > diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index bbce2ce1b1b..d8b6c07bf7d 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -767,14 +767,6 @@ required: True The content of the line read from the log file. -[float] -=== type - -required: True - -The name of the log event. This field is set to the value specified for the `document_type` option in the prospector section of the Filebeat config file. - - [float] === input_type diff --git a/filebeat/docs/migration.asciidoc b/filebeat/docs/migration.asciidoc index 8f4b18ba679..1f683758041 100644 --- a/filebeat/docs/migration.asciidoc +++ b/filebeat/docs/migration.asciidoc @@ -4,7 +4,7 @@ [partintro] -- Filebeat is based on the Logstash Forwarder source code and replaces Logstash Forwarder as the method -to use for tailing log files and forwarding them to Logstash. +to use for tailing log files and forwarding them to Logstash. Filebeat introduces the following major changes: @@ -139,25 +139,20 @@ filebeat.prospectors: paths: - /var/log/messages - /var/log/*.log - document_type: syslog <1> fields: service: apache zone: us-east-1 fields_under_root: true - input_type: stdin <2> - document_type: stdin - input_type: log paths: - /var/log/apache2/httpd-*.log - document_type: apache ------------------------------------------------------------------------------------- -<1> The `document_type` option controls the output `type` field, which is used by the - Elasticsearch output to determine the document type. -<2> The explicit `input_type` option was introduced to differentiate between normal files and +<1> The explicit `input_type` option was introduced to differentiate between normal files and stdin. In the future, additional types might be supported. -As you can see, apart from the new `document_type` and `input_type` options, +As you can see, apart from the new `input_type` options, which were before implicitly defined via the `type` custom field, the remaining options can be migrated mechanically. @@ -287,7 +282,6 @@ filebeat.prospectors: - input_type: log paths: - /var/log/*.log - document_type: syslog fields: service: test01 output.elasticsearch: @@ -375,7 +369,6 @@ filebeat.prospectors: - input_type: log paths: - /var/log/*.log - document_type: syslog fields: service: test01 fields_under_root: true diff --git a/filebeat/docs/reference/configuration/filebeat-options.asciidoc b/filebeat/docs/reference/configuration/filebeat-options.asciidoc index e0e095a9aba..83136af967e 100644 --- a/filebeat/docs/reference/configuration/filebeat-options.asciidoc +++ b/filebeat/docs/reference/configuration/filebeat-options.asciidoc @@ -14,7 +14,6 @@ filebeat.prospectors: - input_type: log paths: - /var/log/apache/httpd-*.log - document_type: apache - input_type: log paths: @@ -303,12 +302,6 @@ If you require log lines to be sent in near real time do not use a very low `sca The default setting is 10s. [[filebeat-document-type]] -===== document_type - -The event type to use for published lines read by harvesters. For Elasticsearch -output, the value that you specify here is used to set the `type` field in the output -document. The default value is `log`. - ===== harvester_buffer_size The size in bytes of the buffer that each harvester uses when fetching a file. The default is 16384. diff --git a/filebeat/filebeat.full.yml b/filebeat/filebeat.full.yml index 14105fa4813..5b107cacfd1 100644 --- a/filebeat/filebeat.full.yml +++ b/filebeat/filebeat.full.yml @@ -236,11 +236,6 @@ filebeat.prospectors: # Time strings like 2h (2 hours), 5m (5 minutes) can be used. #ignore_older: 0 - # Type to be published in the 'type' field. For Elasticsearch output, - # the type defines the document type these entries should be stored - # in. Default: log - #document_type: log - # How often the prospector checks for new files in the paths that are specified # for harvesting. Specify 1s to scan the directory as frequently as possible # without causing Filebeat to scan too frequently. Default: 10s. diff --git a/filebeat/harvester/config.go b/filebeat/harvester/config.go index 05b653f403a..f9653380263 100644 --- a/filebeat/harvester/config.go +++ b/filebeat/harvester/config.go @@ -26,7 +26,6 @@ var ( CloseRenamed: false, CloseEOF: false, CloseTimeout: 0, - DocumentType: "log", CleanInactive: 0, } ) @@ -49,7 +48,6 @@ type harvesterConfig struct { MaxBytes int `config:"max_bytes" validate:"min=0,nonzero"` Multiline *reader.MultilineConfig `config:"multiline"` JSON *reader.JSONConfig `config:"json"` - DocumentType string `config:"document_type"` CleanInactive time.Duration `config:"clean_inactive" validate:"min=0"` Pipeline string `config:"pipeline"` Module string `config:"_module_name"` // hidden option to set the module name diff --git a/filebeat/harvester/log.go b/filebeat/harvester/log.go index b7fee62952d..1aabc3ab2b6 100644 --- a/filebeat/harvester/log.go +++ b/filebeat/harvester/log.go @@ -150,7 +150,6 @@ func (h *Harvester) Harvest(r reader.Reader) { "@timestamp": common.Time(message.Ts), "source": state.Source, "offset": state.Offset, // Offset here is the offset before the starting char. - "type": h.config.DocumentType, "input_type": h.config.InputType, } data.Event.DeepUpdate(message.Fields) diff --git a/filebeat/tests/system/test_json.py b/filebeat/tests/system/test_json.py index eb70cf61a02..297055e3f00 100644 --- a/filebeat/tests/system/test_json.py +++ b/filebeat/tests/system/test_json.py @@ -196,7 +196,6 @@ def test_timestamp_in_message(self): output = self.read_output() assert len(output) == 5 assert all(isinstance(o["@timestamp"], basestring) for o in output) - assert all(isinstance(o["type"], basestring) for o in output) assert output[0]["@timestamp"] == "2016-04-05T18:47:18.444Z" assert output[1]["@timestamp"] != "invalid" @@ -239,14 +238,13 @@ def test_type_in_message(self): output = self.read_output() assert len(output) == 3 assert all(isinstance(o["@timestamp"], basestring) for o in output) - assert all(isinstance(o["type"], basestring) for o in output) assert output[0]["type"] == "test" - assert output[1]["type"] == "log" + assert "type" not in output[1] assert output[1]["json_error"] == \ "type not overwritten (not string)" - assert output[2]["type"] == "log" + assert "type" not in output[2] assert output[2]["json_error"] == \ "type not overwritten (not string)" @@ -283,7 +281,7 @@ def test_with_generic_filtering(self): proc.check_kill_and_wait() output = self.read_output( - required_fields=["@timestamp", "type"], + required_fields=["@timestamp"], ) assert len(output) == 1 o = output[0] @@ -327,7 +325,7 @@ def test_with_generic_filtering_remove_headers(self): proc.check_kill_and_wait() output = self.read_output( - required_fields=["@timestamp", "type"], + required_fields=["@timestamp"], ) assert len(output) == 1 o = output[0] diff --git a/filebeat/tests/system/test_processors.py b/filebeat/tests/system/test_processors.py index 47fb701f09c..e5c3a82a031 100644 --- a/filebeat/tests/system/test_processors.py +++ b/filebeat/tests/system/test_processors.py @@ -28,7 +28,7 @@ def test_dropfields(self): filebeat.check_kill_and_wait() output = self.read_output( - required_fields=["@timestamp", "type"], + required_fields=["@timestamp"], )[0] assert "beat.name" not in output assert "message" in output @@ -53,7 +53,7 @@ def test_include_fields(self): filebeat.check_kill_and_wait() output = self.read_output( - required_fields=["@timestamp", "type"], + required_fields=["@timestamp"], )[0] assert "beat.name" not in output assert "message" in output @@ -81,7 +81,7 @@ def test_drop_event(self): filebeat.check_kill_and_wait() output = self.read_output( - required_fields=["@timestamp", "type"], + required_fields=["@timestamp"], )[0] assert "beat.name" in output assert "message" in output @@ -110,7 +110,7 @@ def test_condition(self): filebeat.check_kill_and_wait() output = self.read_output( - required_fields=["@timestamp", "type"], + required_fields=["@timestamp"], )[0] assert "beat.name" in output assert "message" in output diff --git a/filebeat/tests/system/test_prospector.py b/filebeat/tests/system/test_prospector.py index 1b746af0a6c..bb84d67b5f5 100644 --- a/filebeat/tests/system/test_prospector.py +++ b/filebeat/tests/system/test_prospector.py @@ -648,7 +648,7 @@ def test_prospector_filter_dropfields(self): filebeat.check_kill_and_wait() output = self.read_output( - required_fields=["@timestamp", "type"], + required_fields=["@timestamp"], )[0] assert "offset" not in output assert "message" in output @@ -673,7 +673,7 @@ def test_prospector_filter_includefields(self): filebeat.check_kill_and_wait() output = self.read_output( - required_fields=["@timestamp", "type"], + required_fields=["@timestamp"], )[0] assert "message" not in output assert "offset" in output diff --git a/libbeat/tests/system/beat/beat.py b/libbeat/tests/system/beat/beat.py index e8946f2c7f2..b2af1000dcd 100644 --- a/libbeat/tests/system/beat/beat.py +++ b/libbeat/tests/system/beat/beat.py @@ -11,7 +11,7 @@ import yaml from datetime import datetime, timedelta -BEAT_REQUIRED_FIELDS = ["@timestamp", "type", +BEAT_REQUIRED_FIELDS = ["@timestamp", "beat.name", "beat.hostname", "beat.version"] INTEGRATION_TESTS = os.environ.get('INTEGRATION_TESTS', False)