From bbb1394e81605f5e3a806e79ed7f7f9e68c85d6f Mon Sep 17 00:00:00 2001 From: Andrew Kroh Date: Mon, 24 Jan 2022 11:45:53 -0500 Subject: [PATCH] Improve aws-s3 gzip file detection to avoid false negatives Directly check the byte stream for the gzip magic number and deflate compression type. Avoid using http.DetectContentType because it returns the first match it finds while checking many signatures. Closes #29968 --- CHANGELOG.next.asciidoc | 1 + x-pack/filebeat/input/awss3/s3_objects.go | 12 +++--------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 27ca724a2825..b5d8b7724374 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -110,6 +110,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Undo deletion of endpoint config from cloudtrail fileset in {pull}29415[29415]. {pull}29450[29450] - Make Cisco ASA and FTD modules conform to the ECS definition for event.outcome and event.type. {issue}29581[29581] {pull}29698[29698] - ibmmq: Fixed `@timestamp` not being populated with correct values. {pull}29773[29773] +- aws-s3: Improve gzip detection to avoid false negatives. {issue}29968[29968] *Heartbeat* diff --git a/x-pack/filebeat/input/awss3/s3_objects.go b/x-pack/filebeat/input/awss3/s3_objects.go index 7fe6b193fa45..ebe1a5f0828b 100644 --- a/x-pack/filebeat/input/awss3/s3_objects.go +++ b/x-pack/filebeat/input/awss3/s3_objects.go @@ -15,7 +15,6 @@ import ( "fmt" "io" "io/ioutil" - "net/http" "reflect" "strings" "time" @@ -375,18 +374,13 @@ func s3ObjectHash(obj s3EventV2) string { // stream without consuming it. This makes it convenient for code executed after this function call // to consume the stream if it wants. func isStreamGzipped(r *bufio.Reader) (bool, error) { - // Why 512? See https://godoc.org/net/http#DetectContentType - buf, err := r.Peek(512) + buf, err := r.Peek(3) if err != nil && err != io.EOF { return false, err } - switch http.DetectContentType(buf) { - case "application/x-gzip", "application/zip": - return true, nil - default: - return false, nil - } + // gzip magic number (1f 8b) and the compression method (08 for DEFLATE). + return bytes.HasPrefix(buf, []byte{0x1F, 0x8B, 0x08}), nil } // s3Metadata returns a map containing the selected S3 object metadata keys.