From 30331bcc4c6e3b886eb8b4b393d27d5bfa075c5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= <kvch@users.noreply.github.com>
Date: Thu, 22 Apr 2021 10:15:09 +0200
Subject: [PATCH] Add support for parsers in filestream input (#24763)

This PR adds support for pasers in the `filestream` input.

Example configuration that aggregates fives lines into a single event and parses the JSON contents:
```yaml
- type: filestream
  enabled: true
  paths:
  - test.log
  parsers:
  - multiline:
      type: count
      count_lines: 5
      skip_newline: true
  - ndjson:
      fields_under_root: true
```
---
 filebeat/input/filestream/config.go           |  28 +-
 filebeat/input/filestream/environment_test.go |  33 ++
 .../example_inputs_integration_test.go        |  89 +++
 filebeat/input/filestream/input.go            |  26 +-
 filebeat/input/filestream/parser.go           | 140 +++++
 filebeat/input/filestream/parser_test.go      | 284 +++++++++
 .../filestream/parsers_integration_test.go    | 552 ++++++++++++++++++
 libbeat/reader/message.go                     |   1 +
 libbeat/reader/multiline/multiline_config.go  |   2 +
 libbeat/reader/readjson/json.go               |  71 +++
 10 files changed, 1200 insertions(+), 26 deletions(-)
 create mode 100644 filebeat/input/filestream/example_inputs_integration_test.go
 create mode 100644 filebeat/input/filestream/parser.go
 create mode 100644 filebeat/input/filestream/parser_test.go
 create mode 100644 filebeat/input/filestream/parsers_integration_test.go

diff --git a/filebeat/input/filestream/config.go b/filebeat/input/filestream/config.go
index e460e2627e0..cf41c97d080 100644
--- a/filebeat/input/filestream/config.go
+++ b/filebeat/input/filestream/config.go
@@ -70,7 +70,7 @@ type readerConfig struct {
 	MaxBytes       int                     `config:"message_max_bytes" validate:"min=0,nonzero"`
 	Tail           bool                    `config:"seek_to_tail"`
 
-	Parsers []*common.ConfigNamespace `config:"parsers"` // TODO multiline, json, syslog?
+	Parsers []common.ConfigNamespace `config:"parsers"`
 }
 
 type backoffConfig struct {
@@ -115,7 +115,7 @@ func defaultReaderConfig() readerConfig {
 		LineTerminator: readfile.AutoLineTerminator,
 		MaxBytes:       10 * humanize.MiByte,
 		Tail:           false,
-		Parsers:        nil,
+		Parsers:        make([]common.ConfigNamespace, 0),
 	}
 }
 
@@ -123,26 +123,10 @@ func (c *config) Validate() error {
 	if len(c.Paths) == 0 {
 		return fmt.Errorf("no path is configured")
 	}
-	// TODO
-	//if c.CleanInactive != 0 && c.IgnoreOlder == 0 {
-	//	return fmt.Errorf("ignore_older must be enabled when clean_inactive is used")
-	//}
-
-	// TODO
-	//if c.CleanInactive != 0 && c.CleanInactive <= c.IgnoreOlder+c.ScanFrequency {
-	//	return fmt.Errorf("clean_inactive must be > ignore_older + scan_frequency to make sure only files which are not monitored anymore are removed")
-	//}
-
-	// TODO
-	//if c.JSON != nil && len(c.JSON.MessageKey) == 0 &&
-	//	c.Multiline != nil {
-	//	return fmt.Errorf("When using the JSON decoder and multiline together, you need to specify a message_key value")
-	//}
-
-	//if c.JSON != nil && len(c.JSON.MessageKey) == 0 &&
-	//	(len(c.IncludeLines) > 0 || len(c.ExcludeLines) > 0) {
-	//	return fmt.Errorf("When using the JSON decoder and line filtering together, you need to specify a message_key value")
-	//}
+
+	if err := validateParserConfig(parserConfig{maxBytes: c.Reader.MaxBytes, lineTerminator: c.Reader.LineTerminator}, c.Reader.Parsers); err != nil {
+		return fmt.Errorf("cannot parse parser configuration: %+v", err)
+	}
 
 	return nil
 }
diff --git a/filebeat/input/filestream/environment_test.go b/filebeat/input/filestream/environment_test.go
index f337bc06411..0811fa93405 100644
--- a/filebeat/input/filestream/environment_test.go
+++ b/filebeat/input/filestream/environment_test.go
@@ -345,6 +345,39 @@ func (e *inputTestingEnvironment) getOutputMessages() []string {
 	return messages
 }
 
+func (e *inputTestingEnvironment) requireEventContents(nr int, key, value string) {
+	events := make([]beat.Event, 0)
+	for _, c := range e.pipeline.clients {
+		for _, evt := range c.GetEvents() {
+			events = append(events, evt)
+		}
+	}
+
+	selectedEvent := events[nr]
+	v, err := selectedEvent.Fields.GetValue(key)
+	if err != nil {
+		e.t.Fatalf("cannot find key %s in event %+v", key, selectedEvent)
+	}
+
+	val, ok := v.(string)
+	if !ok {
+		e.t.Fatalf("value is not string %+v", v)
+	}
+	require.Equal(e.t, value, val)
+}
+
+func (e *inputTestingEnvironment) requireEventTimestamp(nr int, ts string) {
+	events := make([]beat.Event, 0)
+	for _, c := range e.pipeline.clients {
+		for _, evt := range c.GetEvents() {
+			events = append(events, evt)
+		}
+	}
+
+	selectedEvent := events[nr]
+	require.Equal(e.t, ts, selectedEvent.Timestamp.String())
+}
+
 type testInputStore struct {
 	registry *statestore.Registry
 }
diff --git a/filebeat/input/filestream/example_inputs_integration_test.go b/filebeat/input/filestream/example_inputs_integration_test.go
new file mode 100644
index 00000000000..1973ef9a6d2
--- /dev/null
+++ b/filebeat/input/filestream/example_inputs_integration_test.go
@@ -0,0 +1,89 @@
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// +build integration
+
+package filestream
+
+const (
+	elasticsearchMultilineLogs = `[2015-12-06 01:44:16,735][INFO ][node                     ] [Zach] version[2.0.0], pid[48553], build[de54438/2015-10-22T08:09:48Z]
+[2015-12-06 01:44:16,736][INFO ][node                     ] [Zach] initializing ...
+[2015-12-06 01:44:16,804][INFO ][plugins                  ] [Zach] loaded [], sites []
+[2015-12-06 01:44:16,941][INFO ][env                      ] [Zach] using [1] data paths, mounts [[/ (/dev/disk1)]], net usable_space [66.3gb], net total_space [232.6gb], spins? [unknown], types [hfs]
+[2015-12-06 01:44:19,177][INFO ][node                     ] [Zach] initialized
+[2015-12-06 01:44:19,177][INFO ][node                     ] [Zach] starting ...
+[2015-12-06 01:44:19,356][INFO ][transport                ] [Zach] publish_address {127.0.0.1:9300}, bound_addresses {127.0.0.1:9300}, {[fe80::1]:9300}, {[::1]:9300}
+[2015-12-06 01:44:19,367][INFO ][discovery                ] [Zach] elasticsearch/qfPw9z0HQe6grbJQruTCJQ
+[2015-12-06 01:44:22,405][INFO ][cluster.service          ] [Zach] new_master {Zach}{qfPw9z0HQe6grbJQruTCJQ}{127.0.0.1}{127.0.0.1:9300}, reason: zen-disco-join(elected_as_master, [0] joins received)
+[2015-12-06 01:44:22,432][INFO ][http                     ] [Zach] publish_address {127.0.0.1:9200}, bound_addresses {127.0.0.1:9200}, {[fe80::1]:9200}, {[::1]:9200}
+[2015-12-06 01:44:22,432][INFO ][node                     ] [Zach] started
+[2015-12-06 01:44:22,446][INFO ][gateway                  ] [Zach] recovered [0] indices into cluster_state
+[2015-12-06 01:44:52,882][INFO ][cluster.metadata         ] [Zach] [filebeat-2015.12.06] creating index, cause [auto(bulk api)], templates [], shards [5]/[1], mappings [process, system]
+[2015-12-06 01:44:53,256][INFO ][cluster.metadata         ] [Zach] [filebeat-2015.12.06] update_mapping [process]
+[2015-12-06 01:44:53,269][DEBUG][action.admin.indices.mapping.put] [Zach] failed to put mappings on indices [[filebeat-2015.12.06]], type [process]
+MergeMappingException[Merge failed with failures {[mapper [proc.mem.rss_p] of different type, current_type [long], merged_type [double]]}]
+	at org.elasticsearch.cluster.metadata.MetaDataMappingService$2.execute(MetaDataMappingService.java:388)
+	at org.elasticsearch.cluster.service.InternalClusterService$UpdateTask.run(InternalClusterService.java:388)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:225)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:188)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+	at java.lang.Thread.run(Thread.java:745)
+[2015-12-06 01:44:53,274][DEBUG][action.bulk              ] [Zach] [filebeat-2015.12.06][0] failed to execute bulk item (index) index {[filebeat-2015.12.06][process][AVF0v5vcVA0hoJdODMTz], source[{"@timestamp":"2015-12-06T00:44:52.448Z","beat":{"hostname":"ruflin","name":"ruflin"},"count":1,"proc":{"cpu":{"user":1902,"user_p":0,"system":941,"total":2843,"start_time":"Dec03"},"mem":{"size":3616309248,"rss":156405760,"rss_p":0.01,"share":0},"name":"Google Chrome H","pid":40572,"ppid":392,"state":"running"},"type":"process"}]}
+MergeMappingException[Merge failed with failures {[mapper [proc.mem.rss_p] of different type, current_type [long], merged_type [double]]}]
+	at org.elasticsearch.cluster.metadata.MetaDataMappingService$2.execute(MetaDataMappingService.java:388)
+	at org.elasticsearch.cluster.service.InternalClusterService$UpdateTask.run(InternalClusterService.java:388)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:225)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:188)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+	at java.lang.Thread.run(Thread.java:745)
+[2015-12-06 01:44:53,279][DEBUG][action.admin.indices.mapping.put] [Zach] failed to put mappings on indices [[filebeat-2015.12.06]], type [process]
+MergeMappingException[Merge failed with failures {[mapper [proc.mem.rss_p] of different type, current_type [long], merged_type [double], mapper [proc.cpu.user_p] of different type, current_type [long], merged_type [double]]}]
+	at org.elasticsearch.cluster.metadata.MetaDataMappingService$2.execute(MetaDataMappingService.java:388)
+	at org.elasticsearch.cluster.service.InternalClusterService$UpdateTask.run(InternalClusterService.java:388)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:225)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:188)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+	at java.lang.Thread.run(Thread.java:745)
+[2015-12-06 01:44:53,280][DEBUG][action.bulk              ] [Zach] [filebeat-2015.12.06][1] failed to execute bulk item (index) index {[filebeat-2015.12.06][process][AVF0v5vbVA0hoJdODMTj], source[{"@timestamp":"2015-12-06T00:44:52.416Z","beat":{"hostname":"ruflin","name":"ruflin"},"count":1,"proc":{"cpu":{"user":6643,"user_p":0.01,"system":693,"total":7336,"start_time":"01:44"},"mem":{"size":5182656512,"rss":248872960,"rss_p":0.01,"share":0},"name":"java","pid":48553,"ppid":48547,"state":"running"},"type":"process"}]}
+MergeMappingException[Merge failed with failures {[mapper [proc.mem.rss_p] of different type, current_type [long], merged_type [double], mapper [proc.cpu.user_p] of different type, current_type [long], merged_type [double]]}]
+	at org.elasticsearch.cluster.metadata.MetaDataMappingService$2.execute(MetaDataMappingService.java:388)
+	at org.elasticsearch.cluster.service.InternalClusterService$UpdateTask.run(InternalClusterService.java:388)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:225)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:188)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+	at java.lang.Thread.run(Thread.java:745)
+[2015-12-06 01:44:53,334][INFO ][cluster.metadata         ] [Zach] [filebeat-2015.12.06] update_mapping [system]
+[2015-12-06 01:44:53,646][INFO ][cluster.metadata         ] [Zach] [filebeat-2015.12.06] create_mapping [filesystem]
+`
+
+	elasticsearchMultilineLongLogs = `[2015-12-06 01:44:16,735][INFO ][node                     ] [Zach] version[2.0.0], pid[48553], build[de54438/2015-10-22T08:09:48Z]
+[2015-12-06 01:44:53,269][DEBUG][action.admin.indices.mapping.put] [Zach] failed to put mappings on indices [[filebeat-2015.12.06]], type [process]
+MergeMappingException[Merge failed with failures {[mapper [proc.mem.rss_p] of different type, current_type [long], merged_type [double]]}]
+	at org.elasticsearch.cluster.metadata.MetaDataMappingService$2.execute(MetaDataMappingService.java:388)
+	at org.elasticsearch.cluster.service.InternalClusterService$UpdateTask.run(InternalClusterService.java:388)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:225)
+	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:188)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+	at java.lang.Thread.run(Thread.java:745)
+[2015-12-06 01:44:53,646][INFO ][cluster.metadata         ] [Zach] [filebeat-2015.12.06] create_mapping [filesystem]
+`
+)
diff --git a/filebeat/input/filestream/input.go b/filebeat/input/filestream/input.go
index 1a093602fb8..1e1298ee1a6 100644
--- a/filebeat/input/filestream/input.go
+++ b/filebeat/input/filestream/input.go
@@ -57,6 +57,8 @@ type filestream struct {
 	encodingFactory encoding.EncodingFactory
 	encoding        encoding.Encoding
 	closerConfig    closerConfig
+	parserConfig    []common.ConfigNamespace
+	msgPostProc     []postProcesser
 }
 
 // Plugin creates a new filestream input plugin for creating a stateful input.
@@ -216,8 +218,16 @@ func (inp *filestream) open(log *logp.Logger, canceler input.Canceler, path stri
 	}
 
 	r = readfile.NewStripNewline(r, inp.readerConfig.LineTerminator)
+
+	r, err = newParsers(r, parserConfig{maxBytes: inp.readerConfig.MaxBytes, lineTerminator: inp.readerConfig.LineTerminator}, inp.readerConfig.Parsers)
+	if err != nil {
+		return nil, err
+	}
+
 	r = readfile.NewLimitReader(r, inp.readerConfig.MaxBytes)
 
+	inp.msgPostProc = newPostProcessors(inp.readerConfig.Parsers)
+
 	return r, nil
 }
 
@@ -369,16 +379,24 @@ func (inp *filestream) eventFromMessage(m reader.Message, path string) beat.Even
 		},
 	}
 	fields.DeepUpdate(m.Fields)
+	m.Fields = fields
+
+	for _, proc := range inp.msgPostProc {
+		proc.PostProcess(&m)
+	}
 
 	if len(m.Content) > 0 {
-		if fields == nil {
-			fields = common.MapStr{}
+		if m.Fields == nil {
+			m.Fields = common.MapStr{}
+		}
+		if _, ok := m.Fields["message"]; !ok {
+			m.Fields["message"] = string(m.Content)
 		}
-		fields["message"] = string(m.Content)
 	}
 
 	return beat.Event{
 		Timestamp: m.Ts,
-		Fields:    fields,
+		Meta:      m.Meta,
+		Fields:    m.Fields,
 	}
 }
diff --git a/filebeat/input/filestream/parser.go b/filebeat/input/filestream/parser.go
new file mode 100644
index 00000000000..70180b4a452
--- /dev/null
+++ b/filebeat/input/filestream/parser.go
@@ -0,0 +1,140 @@
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package filestream
+
+import (
+	"errors"
+	"fmt"
+	"io"
+
+	"github.com/elastic/beats/v7/libbeat/common"
+	"github.com/elastic/beats/v7/libbeat/reader"
+	"github.com/elastic/beats/v7/libbeat/reader/multiline"
+	"github.com/elastic/beats/v7/libbeat/reader/readfile"
+	"github.com/elastic/beats/v7/libbeat/reader/readjson"
+)
+
+var (
+	ErrNoSuchParser = errors.New("no such parser")
+)
+
+// parser transforms or translates the Content attribute of a Message.
+// They are able to aggregate two or more Messages into a single one.
+type parser interface {
+	io.Closer
+	Next() (reader.Message, error)
+}
+
+type parserConfig struct {
+	maxBytes       int
+	lineTerminator readfile.LineTerminator
+}
+
+type postProcesser interface {
+	PostProcess(*reader.Message)
+	Name() string
+}
+
+func newParsers(in reader.Reader, pCfg parserConfig, c []common.ConfigNamespace) (parser, error) {
+	p := in
+
+	parserCheck := make(map[string]int)
+	for _, ns := range c {
+		name := ns.Name()
+		switch name {
+		case "multiline":
+			parserCheck["multiline"]++
+			var config multiline.Config
+			cfg := ns.Config()
+			err := cfg.Unpack(&config)
+			if err != nil {
+				return nil, fmt.Errorf("error while parsing multiline parser config: %+v", err)
+			}
+			p, err = multiline.New(p, "\n", pCfg.maxBytes, &config)
+			if err != nil {
+				return nil, fmt.Errorf("error while creating multiline parser: %+v", err)
+			}
+		case "ndjson":
+			parserCheck["ndjson"]++
+			var config readjson.Config
+			cfg := ns.Config()
+			err := cfg.Unpack(&config)
+			if err != nil {
+				return nil, fmt.Errorf("error while parsing ndjson parser config: %+v", err)
+			}
+			p = readjson.NewJSONReader(p, &config)
+		default:
+			return nil, fmt.Errorf("%s: %s", ErrNoSuchParser, name)
+		}
+	}
+
+	// This is a temporary check. In the long run configuring multiple parsers with the same
+	// type is going to be supported.
+	if count, ok := parserCheck["multiline"]; ok && count > 1 {
+		return nil, fmt.Errorf("only one parser is allowed for multiline, got %d", count)
+	}
+	if count, ok := parserCheck["ndjson"]; ok && count > 1 {
+		return nil, fmt.Errorf("only one parser is allowed for ndjson, got %d", count)
+	}
+
+	return p, nil
+}
+
+func newPostProcessors(c []common.ConfigNamespace) []postProcesser {
+	var pp []postProcesser
+	for _, ns := range c {
+		name := ns.Name()
+		switch name {
+		case "ndjson":
+			var config readjson.Config
+			cfg := ns.Config()
+			cfg.Unpack(&config)
+			pp = append(pp, readjson.NewJSONPostProcessor(&config))
+		default:
+			continue
+		}
+	}
+
+	return pp
+}
+
+func validateParserConfig(pCfg parserConfig, c []common.ConfigNamespace) error {
+	for _, ns := range c {
+		name := ns.Name()
+		switch name {
+		case "multiline":
+			var config multiline.Config
+			cfg := ns.Config()
+			err := cfg.Unpack(&config)
+			if err != nil {
+				return fmt.Errorf("error while parsing multiline parser config: %+v", err)
+			}
+		case "ndjson":
+			var config readjson.Config
+			cfg := ns.Config()
+			err := cfg.Unpack(&config)
+			if err != nil {
+				return fmt.Errorf("error while parsing ndjson parser config: %+v", err)
+			}
+		default:
+			return fmt.Errorf("%s: %s", ErrNoSuchParser, name)
+		}
+	}
+
+	return nil
+}
diff --git a/filebeat/input/filestream/parser_test.go b/filebeat/input/filestream/parser_test.go
new file mode 100644
index 00000000000..a312012eaf3
--- /dev/null
+++ b/filebeat/input/filestream/parser_test.go
@@ -0,0 +1,284 @@
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package filestream
+
+import (
+	"io/ioutil"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/elastic/beats/v7/libbeat/common"
+	"github.com/elastic/beats/v7/libbeat/reader"
+	"github.com/elastic/beats/v7/libbeat/reader/multiline"
+	"github.com/elastic/beats/v7/libbeat/reader/readfile"
+	"github.com/elastic/beats/v7/libbeat/reader/readfile/encoding"
+)
+
+func TestParsersConfigAndReading(t *testing.T) {
+	tests := map[string]struct {
+		lines            string
+		parsers          map[string]interface{}
+		expectedMessages []string
+		expectedError    string
+	}{
+		"no parser, no error": {
+			lines: "line 1\nline 2\n",
+			parsers: map[string]interface{}{
+				"paths": []string{"dummy_path"},
+			},
+			expectedMessages: []string{"line 1\n", "line 2\n"},
+		},
+		"correct multiline parser": {
+			lines: "line 1.1\nline 1.2\nline 1.3\nline 2.1\nline 2.2\nline 2.3\n",
+			parsers: map[string]interface{}{
+				"paths": []string{"dummy_path"},
+				"parsers": []map[string]interface{}{
+					map[string]interface{}{
+						"multiline": map[string]interface{}{
+							"type":        "count",
+							"count_lines": 3,
+						},
+					},
+				},
+			},
+			expectedMessages: []string{
+				"line 1.1\n\nline 1.2\n\nline 1.3\n",
+				"line 2.1\n\nline 2.2\n\nline 2.3\n",
+			},
+		},
+		"multiline docker logs parser": {
+			lines: `{"log":"[log] The following are log messages\n","stream":"stdout","time":"2016-03-02T22:58:51.338462311Z"}
+{"log":"[log] This one is\n","stream":"stdout","time":"2016-03-02T22:58:51.338462311Z"}
+{"log":" on multiple\n","stream":"stdout","time":"2016-03-02T22:58:51.338462311Z"}
+{"log":" lines","stream":"stdout","time":"2016-03-02T22:58:51.338462311Z"}
+{"log":"[log] In total there should be 3 events\n","stream":"stdout","time":"2016-03-02T22:58:51.338462311Z"}
+`,
+			parsers: map[string]interface{}{
+				"paths": []string{"dummy_path"},
+				"parsers": []map[string]interface{}{
+					map[string]interface{}{
+						"ndjson": map[string]interface{}{
+							"keys_under_root": true,
+							"message_key":     "log",
+						},
+					},
+					map[string]interface{}{
+						"multiline": map[string]interface{}{
+							"match":   "after",
+							"negate":  true,
+							"pattern": "^\\[log\\]",
+						},
+					},
+				},
+			},
+			expectedMessages: []string{
+				"[log] The following are log messages\n",
+				"[log] This one is\n\n on multiple\n\n lines",
+				"[log] In total there should be 3 events\n",
+			},
+		},
+		"non existent parser configuration": {
+			parsers: map[string]interface{}{
+				"paths": []string{"dummy_path"},
+				"parsers": []map[string]interface{}{
+					map[string]interface{}{
+						"no_such_parser": nil,
+					},
+				},
+			},
+			expectedError: ErrNoSuchParser.Error(),
+		},
+		"invalid multiline parser configuration is caught before parser creation": {
+			parsers: map[string]interface{}{
+				"paths": []string{"dummy_path"},
+				"parsers": []map[string]interface{}{
+					map[string]interface{}{
+						"multiline": map[string]interface{}{
+							"match": "after",
+						},
+					},
+				},
+			},
+			expectedError: multiline.ErrMissingPattern.Error(),
+		},
+	}
+
+	for name, test := range tests {
+		test := test
+		t.Run(name, func(t *testing.T) {
+			cfg := defaultConfig()
+			parsersConfig := common.MustNewConfigFrom(test.parsers)
+			err := parsersConfig.Unpack(&cfg)
+			if test.expectedError == "" {
+				require.NoError(t, err)
+			} else {
+				require.Contains(t, err.Error(), test.expectedError)
+				return
+			}
+
+			p, err := newParsers(testReader(test.lines), parserConfig{lineTerminator: readfile.AutoLineTerminator, maxBytes: 64}, cfg.Reader.Parsers)
+
+			i := 0
+			msg, err := p.Next()
+			for err == nil {
+				require.Equal(t, test.expectedMessages[i], string(msg.Content))
+				i++
+				msg, err = p.Next()
+			}
+		})
+	}
+}
+
+func TestPostProcessor(t *testing.T) {
+	tests := map[string]struct {
+		message         reader.Message
+		postProcessors  map[string]interface{}
+		expectedMessage reader.Message
+	}{
+		"no postprocesser, no processing": {
+			message: reader.Message{
+				Content: []byte("line 1"),
+			},
+			postProcessors: map[string]interface{}{
+				"paths": []string{"dummy_path"},
+			},
+			expectedMessage: reader.Message{
+				Content: []byte("line 1"),
+			},
+		},
+		"JSON post processer with keys_under_root": {
+			message: reader.Message{
+				Fields: common.MapStr{
+					"json": common.MapStr{
+						"key": "value",
+					},
+				},
+			},
+			postProcessors: map[string]interface{}{
+				"paths": []string{"dummy_path"},
+				"parsers": []map[string]interface{}{
+					map[string]interface{}{
+						"ndjson": map[string]interface{}{
+							"keys_under_root": true,
+						},
+					},
+				},
+			},
+			expectedMessage: reader.Message{
+				Fields: common.MapStr{
+					"key": "value",
+				},
+			},
+		},
+		"JSON post processer with document ID": {
+			message: reader.Message{
+				Fields: common.MapStr{
+					"json": common.MapStr{
+						"key":         "value",
+						"my-id-field": "my-id",
+					},
+				},
+			},
+			postProcessors: map[string]interface{}{
+				"paths": []string{"dummy_path"},
+				"parsers": []map[string]interface{}{
+					map[string]interface{}{
+						"ndjson": map[string]interface{}{
+							"keys_under_root": true,
+							"document_id":     "my-id-field",
+						},
+					},
+				},
+			},
+			expectedMessage: reader.Message{
+				Fields: common.MapStr{
+					"key": "value",
+				},
+				Meta: common.MapStr{
+					"_id": "my-id",
+				},
+			},
+		},
+		"JSON post processer with overwrite keys and under root": {
+			message: reader.Message{
+				Fields: common.MapStr{
+					"json": common.MapStr{
+						"key": "value",
+					},
+					"key":       "another-value",
+					"other-key": "other-value",
+				},
+			},
+			postProcessors: map[string]interface{}{
+				"paths": []string{"dummy_path"},
+				"parsers": []map[string]interface{}{
+					map[string]interface{}{
+						"ndjson": map[string]interface{}{
+							"keys_under_root": true,
+							"overwrite_keys":  true,
+						},
+					},
+				},
+			},
+			expectedMessage: reader.Message{
+				Fields: common.MapStr{
+					"key":       "value",
+					"other-key": "other-value",
+				},
+			},
+		},
+	}
+
+	for name, test := range tests {
+		test := test
+		t.Run(name, func(t *testing.T) {
+			cfg := defaultConfig()
+			common.MustNewConfigFrom(test.postProcessors).Unpack(&cfg)
+			pp := newPostProcessors(cfg.Reader.Parsers)
+
+			msg := test.message
+			for _, p := range pp {
+				p.PostProcess(&msg)
+			}
+			require.Equal(t, test.expectedMessage, msg)
+		})
+	}
+
+}
+
+func testReader(lines string) reader.Reader {
+	encF, _ := encoding.FindEncoding("")
+	reader := strings.NewReader(lines)
+	enc, err := encF(reader)
+	if err != nil {
+		panic(err)
+	}
+	r, err := readfile.NewEncodeReader(ioutil.NopCloser(reader), readfile.Config{
+		Codec:      enc,
+		BufferSize: 1024,
+		Terminator: readfile.AutoLineTerminator,
+		MaxBytes:   1024,
+	})
+	if err != nil {
+		panic(err)
+	}
+
+	return r
+}
diff --git a/filebeat/input/filestream/parsers_integration_test.go b/filebeat/input/filestream/parsers_integration_test.go
new file mode 100644
index 00000000000..3b4d27ffedb
--- /dev/null
+++ b/filebeat/input/filestream/parsers_integration_test.go
@@ -0,0 +1,552 @@
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// +build integration
+
+package filestream
+
+import (
+	"context"
+	"testing"
+)
+
+// test_docker_logs from test_json.py
+func TestParsersDockerLogs(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"ndjson": map[string]interface{}{
+					"message_key": "log",
+				},
+			},
+		},
+	})
+
+	testline := []byte("{\"log\":\"Fetching main repository github.com/elastic/beats...\\n\",\"stream\":\"stdout\",\"time\":\"2016-03-02T22:58:51.338462311Z\"}\n")
+	env.mustWriteLinesToFile(testlogName, testline)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(1)
+	env.requireOffsetInRegistry(testlogName, len(testline))
+
+	env.requireEventContents(0, "json.log", "Fetching main repository github.com/elastic/beats...\n")
+	env.requireEventContents(0, "json.time", "2016-03-02T22:58:51.338462311Z")
+	env.requireEventContents(0, "json.stream", "stdout")
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_docker_logs_filtering from test_json.py
+func TestParsersDockerLogsFiltering(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"ndjson": map[string]interface{}{
+					"message_key":     "log",
+					"keys_under_root": true,
+				},
+			},
+		},
+		"exclude_lines": []string{"main"},
+	})
+
+	testline := []byte(`{"log":"Fetching main repository github.com/elastic/beats...\n","stream":"stdout","time":"2016-03-02T22:58:51.338462311Z"}
+{"log":"Fetching dependencies...\n","stream":"stdout","time":"2016-03-02T22:59:04.609292428Z"}
+{"log":"Execute /scripts/packetbeat_before_build.sh\n","stream":"stdout","time":"2016-03-02T22:59:04.617434682Z"}
+`)
+	env.mustWriteLinesToFile(testlogName, testline)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(2)
+	env.requireOffsetInRegistry(testlogName, len(testline))
+
+	env.requireEventContents(0, "time", "2016-03-02T22:59:04.609292428Z")
+	env.requireEventContents(0, "stream", "stdout")
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_simple_json_overwrite from test_json.py
+func TestParsersSimpleJSONOverwrite(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"ndjson": map[string]interface{}{
+					"message_key":     "message",
+					"keys_under_root": true,
+					"overwrite_keys":  true,
+				},
+			},
+		},
+	})
+
+	testline := []byte("{\"source\": \"hello\", \"message\": \"test source\"}\n")
+	env.mustWriteLinesToFile(testlogName, testline)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(1)
+	env.requireOffsetInRegistry(testlogName, len(testline))
+
+	env.requireEventContents(0, "source", "hello")
+	env.requireEventContents(0, "message", "test source")
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_timestamp_in_message from test_json.py
+func TestParsersTimestampInJSONMessage(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"ndjson": map[string]interface{}{
+					"keys_under_root": true,
+					"overwrite_keys":  true,
+					"add_error_key":   true,
+				},
+			},
+		},
+	})
+
+	testline := []byte(`{"@timestamp":"2016-04-05T18:47:18.444Z"}
+{"@timestamp":"invalid"}
+{"@timestamp":{"hello": "test"}}
+`)
+
+	env.mustWriteLinesToFile(testlogName, testline)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(3)
+	env.requireOffsetInRegistry(testlogName, len(testline))
+
+	env.requireEventTimestamp(0, "2016-04-05 18:47:18.444 +0000 UTC")
+	env.requireEventContents(1, "error.message", "@timestamp not overwritten (parse error on invalid)")
+	env.requireEventContents(2, "error.message", "@timestamp not overwritten (not string)")
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_java_elasticsearch_log from test_multiline.py
+func TestParsersJavaElasticsearchLogs(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"multiline": map[string]interface{}{
+					"type":    "pattern",
+					"pattern": "^\\[",
+					"negate":  true,
+					"match":   "after",
+					"timeout": "100ms", // set to lower value to speed up test
+				},
+			},
+		},
+	})
+
+	testlines := []byte(elasticsearchMultilineLogs)
+	env.mustWriteLinesToFile(testlogName, testlines)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(20)
+	env.requireOffsetInRegistry(testlogName, len(testlines))
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_c_style_log from test_multiline.py
+func TestParsersCStyleLog(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"multiline": map[string]interface{}{
+					"type":    "pattern",
+					"pattern": "\\\\$",
+					"negate":  false,
+					"match":   "before",
+					"timeout": "100ms", // set to lower value to speed up test
+				},
+			},
+		},
+	})
+
+	testlines := []byte(`The following are log messages
+This is a C style log\\
+file which is on multiple\\
+lines
+In addition it has normal lines
+The total should be 4 lines covered
+`)
+	env.mustWriteLinesToFile(testlogName, testlines)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(4)
+	env.requireOffsetInRegistry(testlogName, len(testlines))
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_rabbitmq_multiline_log from test_multiline.py
+func TestParsersRabbitMQMultilineLog(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"multiline": map[string]interface{}{
+					"type":    "pattern",
+					"pattern": "^=[A-Z]+",
+					"negate":  true,
+					"match":   "after",
+					"timeout": "100ms", // set to lower value to speed up test
+				},
+			},
+		},
+	})
+
+	testlines := []byte(`=ERROR REPORT==== 3-Feb-2016::03:10:32 ===
+connection <0.23893.109>, channel 3 - soft error:
+{amqp_error,not_found,
+            "no queue 'bucket-1' in vhost '/'",
+            'queue.declare'}
+=ERROR REPORT==== 3-Feb-2016::03:10:32 ===
+connection <0.23893.109>, channel 3 - soft error:
+{amqp_error,not_found,
+            "no queue 'bucket-1' in vhost '/'",
+            'queue.declare'}
+`)
+	env.mustWriteLinesToFile(testlogName, testlines)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(2)
+	env.requireOffsetInRegistry(testlogName, len(testlines))
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_max_lines from test_multiline.py
+func TestParsersMultilineMaxLines(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"multiline": map[string]interface{}{
+					"type":      "pattern",
+					"pattern":   "^\\[",
+					"negate":    true,
+					"match":     "after",
+					"max_lines": 3,
+					"timeout":   "100ms", // set to lower value to speed up test
+				},
+			},
+		},
+	})
+
+	testlines := []byte(elasticsearchMultilineLongLogs)
+	env.mustWriteLinesToFile(testlogName, testlines)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(3)
+	env.requireOffsetInRegistry(testlogName, len(testlines))
+
+	env.requireEventsReceived([]string{
+		"[2015-12-06 01:44:16,735][INFO ][node                     ] [Zach] version[2.0.0], pid[48553], build[de54438/2015-10-22T08:09:48Z]",
+		`[2015-12-06 01:44:53,269][DEBUG][action.admin.indices.mapping.put] [Zach] failed to put mappings on indices [[filebeat-2015.12.06]], type [process]
+MergeMappingException[Merge failed with failures {[mapper [proc.mem.rss_p] of different type, current_type [long], merged_type [double]]}]
+	at org.elasticsearch.cluster.metadata.MetaDataMappingService$2.execute(MetaDataMappingService.java:388)`,
+		"[2015-12-06 01:44:53,646][INFO ][cluster.metadata         ] [Zach] [filebeat-2015.12.06] create_mapping [filesystem]",
+	})
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_timeout from test_multiline.py
+func TestParsersMultilineTimeout(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"multiline": map[string]interface{}{
+					"type":      "pattern",
+					"pattern":   "^\\[",
+					"negate":    true,
+					"match":     "after",
+					"max_lines": 3,
+					"timeout":   "100ms", // set to lower value to speed up test
+				},
+			},
+		},
+	})
+
+	testlines := []byte(`[2015] hello world
+  First Line
+  Second Line
+`)
+	env.mustWriteLinesToFile(testlogName, testlines)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(1)
+	env.requireOffsetInRegistry(testlogName, len(testlines))
+
+	moreLines := []byte(`  This should not be third
+  This should not be fourth
+[2016] Hello world
+  First line again
+`)
+
+	env.mustAppendLinesToFile(testlogName, moreLines)
+
+	env.requireEventsReceived([]string{
+		`[2015] hello world
+  First Line
+  Second Line`,
+	})
+
+	env.waitUntilEventCount(3)
+	env.requireOffsetInRegistry(testlogName, len(testlines)+len(moreLines))
+	env.requireEventsReceived([]string{`[2015] hello world
+  First Line
+  Second Line`,
+		`  This should not be third
+  This should not be fourth`,
+		`[2016] Hello world
+  First line again`,
+	})
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_max_bytes from test_multiline.py
+func TestParsersMultilineMaxBytes(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"message_max_bytes":                 50,
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"multiline": map[string]interface{}{
+					"type":    "pattern",
+					"pattern": "^\\[",
+					"negate":  true,
+					"match":   "after",
+					"timeout": "100ms", // set to lower value to speed up test
+				},
+			},
+		},
+	})
+
+	testlines := []byte(elasticsearchMultilineLongLogs)
+	env.mustWriteLinesToFile(testlogName, testlines)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(3)
+	env.requireOffsetInRegistry(testlogName, len(testlines))
+
+	env.requireEventsReceived([]string{
+		"[2015-12-06 01:44:16,735][INFO ][node             ",
+		"[2015-12-06 01:44:53,269][DEBUG][action.admin.indi",
+		"[2015-12-06 01:44:53,646][INFO ][cluster.metadata ",
+	})
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_close_timeout_with_multiline from test_multiline.py
+func TestParsersCloseTimeoutWithMultiline(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"close.reader.after_interval":       "100ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"multiline": map[string]interface{}{
+					"type":    "pattern",
+					"pattern": "^\\[",
+					"negate":  true,
+					"match":   "after",
+				},
+			},
+		},
+	})
+
+	testlines := []byte(`[2015] hello world
+  First Line
+  Second Line
+`)
+	env.mustWriteLinesToFile(testlogName, testlines)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(1)
+	env.requireOffsetInRegistry(testlogName, len(testlines))
+	env.waitUntilHarvesterIsDone()
+
+	moreLines := []byte(`  This should not be third
+  This should not be fourth
+[2016] Hello world
+  First line again
+`)
+
+	env.mustAppendLinesToFile(testlogName, moreLines)
+
+	env.requireEventsReceived([]string{
+		`[2015] hello world
+  First Line
+  Second Line`,
+	})
+
+	env.waitUntilEventCount(3)
+	env.requireOffsetInRegistry(testlogName, len(testlines)+len(moreLines))
+	env.requireEventsReceived([]string{`[2015] hello world
+  First Line
+  Second Line`,
+		`  This should not be third
+  This should not be fourth`,
+		`[2016] Hello world
+  First line again`,
+	})
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
+
+// test_consecutive_newline from test_multiline.py
+func TestParsersConsecutiveNewline(t *testing.T) {
+	env := newInputTestingEnvironment(t)
+
+	testlogName := "test.log"
+	inp := env.mustCreateInput(map[string]interface{}{
+		"paths":                             []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval": "1ms",
+		"parsers": []map[string]interface{}{
+			map[string]interface{}{
+				"multiline": map[string]interface{}{
+					"type":    "pattern",
+					"pattern": "^\\[",
+					"negate":  true,
+					"match":   "after",
+					"timeout": "100ms", // set to lower value to speed up test
+				},
+			},
+		},
+	})
+
+	line1 := `[2016-09-02 19:54:23 +0000] Started 2016-09-02 19:54:23 +0000 "GET" for /gaq?path=%2FCA%2FFallbrook%2F1845-Acacia-Ln&referer=http%3A%2F%2Fwww.xxxxx.com%2FAcacia%2BLn%2BFallbrook%2BCA%2Baddresses&search_bucket=none&page_controller=v9%2Faddresses&page_action=show at 23.235.47.31
+X-Forwarded-For:72.197.227.93, 23.235.47.31
+Processing by GoogleAnalyticsController#index as JSON
+
+  Parameters: {"path"=>"/CA/Fallbrook/1845-Acacia-Ln", "referer"=>"http://www.xxxx.com/Acacia+Ln+Fallbrook+CA+addresses", "search_bucket"=>"none", "page_controller"=>"v9/addresses", "page_action"=>"show"}
+Completed 200 OK in 5ms (Views: 1.9ms)
+`
+	line2 := `[2016-09-02 19:54:23 +0000] Started 2016-09-02 19:54:23 +0000 "GET" for /health_check at xxx.xx.44.181
+X-Forwarded-For:
+SetAdCodeMiddleware.default_ad_code referer
+SetAdCodeMiddleware.default_ad_code path /health_check
+SetAdCodeMiddleware.default_ad_code route
+`
+	testlines := append([]byte(line1), []byte(line2)...)
+	env.mustWriteLinesToFile(testlogName, testlines)
+
+	ctx, cancelInput := context.WithCancel(context.Background())
+	env.startInput(ctx, inp)
+
+	env.waitUntilEventCount(2)
+	env.requireOffsetInRegistry(testlogName, len(testlines))
+
+	env.requireEventsReceived([]string{
+		line1[:len(line1)-1],
+		line2[:len(line2)-1],
+	})
+
+	cancelInput()
+	env.waitUntilInputStops()
+}
diff --git a/libbeat/reader/message.go b/libbeat/reader/message.go
index 344eacd54df..5798c3a9869 100644
--- a/libbeat/reader/message.go
+++ b/libbeat/reader/message.go
@@ -30,6 +30,7 @@ type Message struct {
 	Content []byte        // actual content read
 	Bytes   int           // total number of bytes read to generate the message
 	Fields  common.MapStr // optional fields that can be added by reader
+	Meta    common.MapStr
 }
 
 // IsEmpty returns true in case the message is empty
diff --git a/libbeat/reader/multiline/multiline_config.go b/libbeat/reader/multiline/multiline_config.go
index d8b63b107e3..5b55b60a2df 100644
--- a/libbeat/reader/multiline/multiline_config.go
+++ b/libbeat/reader/multiline/multiline_config.go
@@ -80,6 +80,8 @@ func (c *Config) Validate() error {
 		if c.Pattern == nil {
 			return ErrMissingPattern
 		}
+	} else {
+		return fmt.Errorf("unknown multiline type %d", c.Type)
 	}
 	return nil
 }
diff --git a/libbeat/reader/readjson/json.go b/libbeat/reader/readjson/json.go
index bbbbdeb3ade..370f46fdea4 100644
--- a/libbeat/reader/readjson/json.go
+++ b/libbeat/reader/readjson/json.go
@@ -37,6 +37,10 @@ type JSONReader struct {
 	logger *logp.Logger
 }
 
+type JSONPostProcessor struct {
+	cfg *Config
+}
+
 // NewJSONReader creates a new reader that can decode JSON.
 func NewJSONReader(r reader.Reader, cfg *Config) *JSONReader {
 	return &JSONReader{
@@ -46,6 +50,10 @@ func NewJSONReader(r reader.Reader, cfg *Config) *JSONReader {
 	}
 }
 
+func NewJSONPostProcessor(cfg *Config) *JSONPostProcessor {
+	return &JSONPostProcessor{cfg}
+}
+
 // decodeJSON unmarshals the text parameter into a MapStr and
 // returns the new text column if one was requested.
 func (r *JSONReader) decode(text []byte) ([]byte, common.MapStr) {
@@ -119,6 +127,69 @@ func createJSONError(message string) common.MapStr {
 	return common.MapStr{"message": message, "type": "json"}
 }
 
+func (pp *JSONPostProcessor) Name() string {
+	return "json"
+}
+
+func (pp *JSONPostProcessor) PostProcess(msg *reader.Message) {
+	jsonFields, ok := msg.Fields[pp.Name()].(common.MapStr)
+	if !ok {
+		return
+	}
+
+	// The message key might have been modified by multiline
+	if len(pp.cfg.MessageKey) > 0 && len(msg.Content) > 0 {
+		jsonFields[pp.cfg.MessageKey] = string(msg.Content)
+	}
+
+	// handle the case in which r.cfg.AddErrorKey is set and len(jsonFields) == 1
+	// and only thing it contains is `error` key due to error in json decoding
+	// which results in loss of message key in the main beat event
+	if len(jsonFields) == 1 && jsonFields["error"] != nil {
+		msg.Fields["message"] = string(msg.Content)
+	}
+
+	var id string
+	if key := pp.cfg.DocumentID; key != "" {
+		if tmp, err := jsonFields.GetValue(key); err == nil {
+			if v, ok := tmp.(string); ok {
+				id = v
+				jsonFields.Delete(key)
+			}
+		}
+	}
+
+	if pp.cfg.KeysUnderRoot {
+		// Delete existing json key
+		delete(msg.Fields, "json")
+
+		var ts time.Time
+		if v, ok := jsonFields["@timestamp"]; ok {
+			switch t := v.(type) {
+			case time.Time:
+				ts = t
+			case common.Time:
+				ts = time.Time(ts)
+			}
+			delete(msg.Fields, "@timestamp")
+
+		}
+		event := &beat.Event{
+			Timestamp: ts,
+			Fields:    msg.Fields,
+		}
+		jsontransform.WriteJSONKeys(event, jsonFields, pp.cfg.ExpandKeys, pp.cfg.OverwriteKeys, pp.cfg.AddErrorKey)
+		msg.Ts = event.Timestamp
+	}
+
+	if id != "" {
+		if msg.Meta == nil {
+			msg.Meta = common.MapStr{}
+		}
+		msg.Meta["_id"] = id
+	}
+}
+
 // MergeJSONFields writes the JSON fields in the event map,
 // respecting the KeysUnderRoot, ExpandKeys, and OverwriteKeys configuration options.
 // If MessageKey is defined, the Text value from the event always