From 4bb8fa33034f5eeae53ed18f811bf64f237a393a Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 26 Feb 2020 11:34:50 -0700 Subject: [PATCH] [Filebeat] Check expand_event_list_from_field before checking content-type (#16441) * Check expand_event_list_from_field before checking content-type --- CHANGELOG.next.asciidoc | 1 + .../docs/inputs/input-aws-s3.asciidoc | 6 ++++- x-pack/filebeat/input/s3/input.go | 23 ++++++++++--------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index cea38cb6824..2056d6664b7 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -77,6 +77,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Prevent Elasticsearch from spewing log warnings about redundant wildcards when setting up ingest pipelines for the `elasticsearch` module. {issue}15840[15840] {pull}15900[15900] - Fix mapping error for cloudtrail additionalEventData field {pull}16088[16088] - Fix a connection error in httpjson input. {pull}16123[16123] +- Fix s3 input with cloudtrail fileset reading json file. {issue}16374[16374] {pull}16441[16441] - Rewrite azure filebeat dashboards, due to changes in kibana. {pull}16466[16466] - Adding the var definitions in azure manifest files, fix for errors when executing command setup. {issue}16270[16270] {pull}16468[16468] - Fix merging of fileset inputs to replace paths and append processors. {pull}16450{16450} diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index 6715b854747..fb84c486a42 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -59,7 +59,11 @@ If the fileset using this input expects to receive multiple messages bundled under a specific field then the config option expand_event_list_from_field value can be assigned the name of the field. This setting will be able to split the messages under the group value into separate events. For example, CloudTrail logs -are in JSON format and events are found under the JSON object "Records": +are in JSON format and events are found under the JSON object "Records". + +Note: When `expand_event_list_from_field` parameter is given in the config, s3 +input will assume the logs are in JSON format and decode them as JSON. Content +type will not be checked. [float] ==== `api_timeout` diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index c038797a807..9ceba051bf9 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -427,17 +427,6 @@ func (p *s3Input) createEventsFromS3Info(svc s3iface.ClientAPI, info s3Info, s3C defer resp.Body.Close() reader := bufio.NewReader(resp.Body) - // Check content-type - if (resp.ContentType != nil && *resp.ContentType == "application/x-gzip") || strings.HasSuffix(info.key, ".gz") { - gzipReader, err := gzip.NewReader(resp.Body) - if err != nil { - err = errors.Wrap(err, "gzip.NewReader failed") - p.logger.Error(err) - return err - } - reader = bufio.NewReader(gzipReader) - gzipReader.Close() - } // Decode JSON documents when expand_event_list_from_field is given in config if p.config.ExpandEventListFromField != "" { @@ -451,6 +440,18 @@ func (p *s3Input) createEventsFromS3Info(svc s3iface.ClientAPI, info s3Info, s3C return nil } + // Check content-type + if (resp.ContentType != nil && *resp.ContentType == "application/x-gzip") || strings.HasSuffix(info.key, ".gz") { + gzipReader, err := gzip.NewReader(resp.Body) + if err != nil { + err = errors.Wrap(err, "gzip.NewReader failed") + p.logger.Error(err) + return err + } + reader = bufio.NewReader(gzipReader) + gzipReader.Close() + } + // handle s3 objects that are not json content-type offset := 0 for {