From a61acd3318604924c51c2f3b1ddab92d8048b1b2 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Thu, 25 Aug 2022 10:49:28 +0200 Subject: [PATCH] [7.17](backport #32767) allow for json/ndjson content type with charset (#32834) * allow for json/ndjson content type with charset (#32767) * allow for json/ndjson content type with charset * add '; charset=UTF-8' in integration tests * changelog * assessing CI * revert correct behaviour (cherry picked from commit b40349ce5ac44a49c65e268419dc17336df1509d) # Conflicts: # x-pack/filebeat/input/awss3/input_integration_test.go * fix backport conflicts Co-authored-by: Andrea Spacca --- CHANGELOG.next.asciidoc | 1 + .../input/awss3/input_integration_test.go | 19 ++++++++++++------- x-pack/filebeat/input/awss3/s3_objects.go | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index e8945a005949..e7994d337686 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -39,6 +39,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d *Filebeat* - Fix file.path field in cloudtrail fileset to use json.digestS3Object. {pull}32759[32759] +- Fix not parsing as json when `json` and `ndjson` content types have charset information in `aws-s3` input {pull}32767[32767] *Heartbeat* diff --git a/x-pack/filebeat/input/awss3/input_integration_test.go b/x-pack/filebeat/input/awss3/input_integration_test.go index 32b61bfcd539..e56da8f3745b 100644 --- a/x-pack/filebeat/input/awss3/input_integration_test.go +++ b/x-pack/filebeat/input/awss3/input_integration_test.go @@ -16,6 +16,7 @@ import ( "io/ioutil" "os" "path/filepath" + "strings" "testing" "time" @@ -86,7 +87,6 @@ file_selectors: - regex: 'events-array.json$' expand_event_list_from_field: Events - content_type: application/json include_s3_metadata: - last-modified - x-amz-version-id @@ -95,7 +95,6 @@ file_selectors: - Content-Type - regex: '\.(?:nd)?json(\.gz)?$' - content_type: application/json - regex: 'multiline.txt$' parsers: @@ -115,7 +114,6 @@ file_selectors: - regex: 'events-array.json$' expand_event_list_from_field: Events - content_type: application/json include_s3_metadata: - last-modified - x-amz-version-id @@ -124,7 +122,6 @@ file_selectors: - Content-Type - regex: '\.(?:nd)?json(\.gz)?$' - content_type: application/json - regex: 'multiline.txt$' parsers: @@ -324,11 +321,19 @@ func uploadS3TestFiles(t *testing.T, region, bucket string, filenames ...string) t.Fatalf("Failed to open file %q, %v", filename, err) } + contentType := "" + if strings.HasSuffix(filename, "ndjson") || strings.HasSuffix(filename, "ndjson.gz") { + contentType = contentTypeNDJSON + "; charset=UTF-8" + } else if strings.HasSuffix(filename, "json") || strings.HasSuffix(filename, "json.gz") { + contentType = contentTypeJSON + "; charset=UTF-8" + } + // Upload the file to S3. result, err := uploader.Upload(&s3manager.UploadInput{ - Bucket: aws.String(bucket), - Key: aws.String(filepath.Base(filename)), - Body: bytes.NewReader(data), + Bucket: aws.String(bucket), + Key: aws.String(filepath.Base(filename)), + Body: bytes.NewReader(data), + ContentType: aws.String(contentType), }) if err != nil { t.Fatalf("Failed to upload file %q: %v", filename, err) diff --git a/x-pack/filebeat/input/awss3/s3_objects.go b/x-pack/filebeat/input/awss3/s3_objects.go index 9626a84a4adc..ba34d8e516e8 100644 --- a/x-pack/filebeat/input/awss3/s3_objects.go +++ b/x-pack/filebeat/input/awss3/s3_objects.go @@ -151,7 +151,7 @@ func (p *s3ObjectProcessor) ProcessS3Object() error { // Process object content stream. switch { - case contentType == contentTypeJSON || contentType == contentTypeNDJSON: + case strings.HasPrefix(contentType, contentTypeJSON) || strings.HasPrefix(contentType, contentTypeNDJSON): err = p.readJSON(reader) default: err = p.readFile(reader)