From f5b281662611608e686ce4848c6a3b1054e9891a Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Fri, 17 Jan 2020 09:14:53 -0600 Subject: [PATCH 01/10] [Heartbeat] Fix excessive memory usage when parsing bodies In 7.5 we introduced a regression where we would allocate a 100MiB buffer per HTTP request. This change uses a growable byte buffer instead. --- heartbeat/monitors/active/http/respbody.go | 34 +++++++++------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/heartbeat/monitors/active/http/respbody.go b/heartbeat/monitors/active/http/respbody.go index 7990b6324e77..15d7212df6e9 100644 --- a/heartbeat/monitors/active/http/respbody.go +++ b/heartbeat/monitors/active/http/respbody.go @@ -18,12 +18,15 @@ package http import ( + "bytes" "crypto/sha256" "encoding/hex" "io" "net/http" "unicode/utf8" + "github.com/docker/go-units" + "github.com/elastic/beats/heartbeat/reason" "github.com/elastic/beats/libbeat/common" ) @@ -31,7 +34,7 @@ import ( // maxBufferBodyBytes sets a hard limit on how much we're willing to buffer for any reason internally. // since we must buffer the whole body for body validators this is effectively a cap on that. // 100MiB out to be enough for everybody. -const maxBufferBodyBytes = 100 * 1024 * 1024 +const maxBufferBodyBytes = 100 * units.MiB func processBody(resp *http.Response, config responseConfig, validator multiValidator) (common.MapStr, reason.Reason) { // Determine how much of the body to actually buffer in memory @@ -94,29 +97,19 @@ func readBody(resp *http.Response, maxSampleBytes int) (bodySample string, bodyS func readPrefixAndHash(body io.ReadCloser, maxPrefixSize int) (respSize int, prefix string, hashStr string, err error) { hash := sha256.New() - // Function to lazily get the body of the response - rawBuf := make([]byte, 1024) - // Buffer to hold the prefix output along with tracking info - prefixBuf := make([]byte, maxPrefixSize) - prefixRemainingBytes := maxPrefixSize - prefixWriteOffset := 0 + var prefixBuf bytes.Buffer + + chunk := make([]byte, 1024) for { - readSize, readErr := body.Read(rawBuf) + readSize, readErr := body.Read(chunk) respSize += readSize - hash.Write(rawBuf[:readSize]) + hash.Write(chunk[:readSize]) + prefixRemainingBytes := maxPrefixSize - prefixBuf.Len() if prefixRemainingBytes > 0 { - if readSize >= prefixRemainingBytes { - copy(prefixBuf[prefixWriteOffset:maxPrefixSize], rawBuf[:prefixRemainingBytes]) - prefixWriteOffset += prefixRemainingBytes - prefixRemainingBytes = 0 - } else { - copy(prefixBuf[prefixWriteOffset:prefixWriteOffset+readSize], rawBuf[:readSize]) - prefixWriteOffset += readSize - prefixRemainingBytes -= readSize - } + prefixBuf.Write(chunk[:readSize]) } if readErr == io.EOF { @@ -129,8 +122,9 @@ func readPrefixAndHash(body io.ReadCloser, maxPrefixSize int) (respSize int, pre } // We discard the body if it is not valid UTF-8 - if utf8.Valid(prefixBuf[:prefixWriteOffset]) { - prefix = string(prefixBuf[:prefixWriteOffset]) + prefixBytes := prefixBuf.Bytes() + if utf8.Valid(prefixBytes) { + prefix = string(prefixBytes) } return respSize, prefix, hex.EncodeToString(hash.Sum(nil)), nil } From 50e4218a43692d195dca77bed7de24486b8582fd Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Fri, 17 Jan 2020 09:20:48 -0600 Subject: [PATCH 02/10] More precise byte accounting --- heartbeat/monitors/active/http/respbody.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/heartbeat/monitors/active/http/respbody.go b/heartbeat/monitors/active/http/respbody.go index 15d7212df6e9..06576b56556d 100644 --- a/heartbeat/monitors/active/http/respbody.go +++ b/heartbeat/monitors/active/http/respbody.go @@ -26,7 +26,6 @@ import ( "unicode/utf8" "github.com/docker/go-units" - "github.com/elastic/beats/heartbeat/reason" "github.com/elastic/beats/libbeat/common" ) @@ -109,7 +108,11 @@ func readPrefixAndHash(body io.ReadCloser, maxPrefixSize int) (respSize int, pre prefixRemainingBytes := maxPrefixSize - prefixBuf.Len() if prefixRemainingBytes > 0 { - prefixBuf.Write(chunk[:readSize]) + if prefixRemainingBytes < readSize { + prefixBuf.Write(chunk[:prefixRemainingBytes]) + } else { + prefixBuf.Write(chunk[:readSize]) + } } if readErr == io.EOF { From 7f044dfc0e7f1cc4a51d1f80dfe5b0564a122524 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Fri, 17 Jan 2020 11:15:13 -0600 Subject: [PATCH 03/10] Stop checking for UTF-8 in body --- heartbeat/monitors/active/http/respbody.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/heartbeat/monitors/active/http/respbody.go b/heartbeat/monitors/active/http/respbody.go index 06576b56556d..05d0615090ed 100644 --- a/heartbeat/monitors/active/http/respbody.go +++ b/heartbeat/monitors/active/http/respbody.go @@ -23,7 +23,6 @@ import ( "encoding/hex" "io" "net/http" - "unicode/utf8" "github.com/docker/go-units" "github.com/elastic/beats/heartbeat/reason" @@ -124,10 +123,5 @@ func readPrefixAndHash(body io.ReadCloser, maxPrefixSize int) (respSize int, pre } } - // We discard the body if it is not valid UTF-8 - prefixBytes := prefixBuf.Bytes() - if utf8.Valid(prefixBytes) { - prefix = string(prefixBytes) - } - return respSize, prefix, hex.EncodeToString(hash.Sum(nil)), nil + return respSize, prefixBuf.String(), hex.EncodeToString(hash.Sum(nil)), nil } From cae4335e1020031a78a1a8634258d53277fdd27f Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Fri, 17 Jan 2020 11:29:20 -0600 Subject: [PATCH 04/10] Switch string builder --- heartbeat/monitors/active/http/respbody.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heartbeat/monitors/active/http/respbody.go b/heartbeat/monitors/active/http/respbody.go index 05d0615090ed..782cf9267651 100644 --- a/heartbeat/monitors/active/http/respbody.go +++ b/heartbeat/monitors/active/http/respbody.go @@ -18,11 +18,11 @@ package http import ( - "bytes" "crypto/sha256" "encoding/hex" "io" "net/http" + "strings" "github.com/docker/go-units" "github.com/elastic/beats/heartbeat/reason" @@ -96,7 +96,7 @@ func readBody(resp *http.Response, maxSampleBytes int) (bodySample string, bodyS func readPrefixAndHash(body io.ReadCloser, maxPrefixSize int) (respSize int, prefix string, hashStr string, err error) { hash := sha256.New() - var prefixBuf bytes.Buffer + var prefixBuf strings.Builder chunk := make([]byte, 1024) for { From 03c5110a6d700ca2f1c7aaecbb45f706b1403b19 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Fri, 17 Jan 2020 13:55:21 -0600 Subject: [PATCH 05/10] gofmt --- heartbeat/monitors/active/http/respbody.go | 1 + 1 file changed, 1 insertion(+) diff --git a/heartbeat/monitors/active/http/respbody.go b/heartbeat/monitors/active/http/respbody.go index 782cf9267651..257fb404c1b3 100644 --- a/heartbeat/monitors/active/http/respbody.go +++ b/heartbeat/monitors/active/http/respbody.go @@ -25,6 +25,7 @@ import ( "strings" "github.com/docker/go-units" + "github.com/elastic/beats/heartbeat/reason" "github.com/elastic/beats/libbeat/common" ) From 16af47d000b755a5d96c11a1db455af07828bd9f Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Tue, 21 Jan 2020 11:41:15 -0600 Subject: [PATCH 06/10] Use io.Copy --- heartbeat/monitors/active/http/respbody.go | 33 +++++++--------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/heartbeat/monitors/active/http/respbody.go b/heartbeat/monitors/active/http/respbody.go index 257fb404c1b3..2bffe9b8b001 100644 --- a/heartbeat/monitors/active/http/respbody.go +++ b/heartbeat/monitors/active/http/respbody.go @@ -99,30 +99,17 @@ func readPrefixAndHash(body io.ReadCloser, maxPrefixSize int) (respSize int, pre var prefixBuf strings.Builder - chunk := make([]byte, 1024) - for { - readSize, readErr := body.Read(chunk) - - respSize += readSize - hash.Write(chunk[:readSize]) - - prefixRemainingBytes := maxPrefixSize - prefixBuf.Len() - if prefixRemainingBytes > 0 { - if prefixRemainingBytes < readSize { - prefixBuf.Write(chunk[:prefixRemainingBytes]) - } else { - prefixBuf.Write(chunk[:readSize]) - } - } - - if readErr == io.EOF { - break - } + n, err := io.Copy(&prefixBuf, io.TeeReader(io.LimitReader(body, int64(maxPrefixSize)), hash)) + if err == nil { + // finish streaming into hash if the body has not been fully consumed yet + var m int64 + m, err = io.Copy(hash, body) + n += m + } - if readErr != nil { - return 0, "", "", readErr - } + if err != nil { + return 0, "", "", err } - return respSize, prefixBuf.String(), hex.EncodeToString(hash.Sum(nil)), nil + return int(n), prefixBuf.String(), hex.EncodeToString(hash.Sum(nil)), nil } From 2ee596b491f42bcad148513770080abe1c1c5d63 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Tue, 21 Jan 2020 22:45:52 -0600 Subject: [PATCH 07/10] Properly handle EOF --- heartbeat/monitors/active/http/respbody.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heartbeat/monitors/active/http/respbody.go b/heartbeat/monitors/active/http/respbody.go index 2bffe9b8b001..b8b95e5b023d 100644 --- a/heartbeat/monitors/active/http/respbody.go +++ b/heartbeat/monitors/active/http/respbody.go @@ -107,7 +107,7 @@ func readPrefixAndHash(body io.ReadCloser, maxPrefixSize int) (respSize int, pre n += m } - if err != nil { + if err != nil && err != io.EOF { return 0, "", "", err } From eb65da3023c838021d74add8bc7d35b279d194ea Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Tue, 21 Jan 2020 22:47:36 -0600 Subject: [PATCH 08/10] Add changelog entry --- CHANGELOG.next.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 647632f6bfba..36e5dcc920bf 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -59,6 +59,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Changed default user-agent to be `Elastic-Heartbeat/VERSION (PLATFORM_INFO)` as the current default `Go-http-client/1.1` is often blacklisted. {pull}14291[14291] - JSON/Regex checks against HTTP bodies will only consider the first 100MiB of the HTTP body to prevent excessive memory usage. {pull}14223[pull] - Heartbeat now starts monitors scheduled with the '@every X' syntax instantaneously on startup, rather than waiting for the given interval to pass before running them. {pull}14890[14890] +* Fixed excessive memory usage introduced in 7.5 due to over-allocating memory for HTTP checks. {pull}15639[15639] *Journalbeat* From a820f1dd52c82af1f5e8651c15f0ef89bbe0baf3 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Tue, 21 Jan 2020 22:48:42 -0600 Subject: [PATCH 09/10] Fix changelog --- CHANGELOG.next.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 36e5dcc920bf..c694ccb822d3 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -59,7 +59,6 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Changed default user-agent to be `Elastic-Heartbeat/VERSION (PLATFORM_INFO)` as the current default `Go-http-client/1.1` is often blacklisted. {pull}14291[14291] - JSON/Regex checks against HTTP bodies will only consider the first 100MiB of the HTTP body to prevent excessive memory usage. {pull}14223[pull] - Heartbeat now starts monitors scheduled with the '@every X' syntax instantaneously on startup, rather than waiting for the given interval to pass before running them. {pull}14890[14890] -* Fixed excessive memory usage introduced in 7.5 due to over-allocating memory for HTTP checks. {pull}15639[15639] *Journalbeat* @@ -246,6 +245,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix storage of HTTP bodies to work when JSON/Regex body checks are enabled. {pull}14223[14223] - Fix recording of SSL cert metadata for Expired/Unvalidated x509 certs. {pull}13687[13687] - The heartbeat scheduler no longer drops scheduled items when under very high load causing missed deadlines. {pull}14890[14890] +- Fixed excessive memory usage introduced in 7.5 due to over-allocating memory for HTTP checks. {pull}15639[15639] *Journalbeat* From 0b6a5af98ab5e8630083d43a560718958ef84389 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Tue, 21 Jan 2020 22:50:19 -0600 Subject: [PATCH 10/10] Fix changelog merge --- CHANGELOG.next.asciidoc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 8b9e65e9b629..667258565191 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -49,12 +49,6 @@ TLS or Beats that accept connections over TLS and validate client certificates. *Heartbeat* -- Fix NPEs / resource leaks when executing config checks. {pull}11165[11165] -- Fix duplicated IPs on `mode: all` monitors. {pull}12458[12458] -- Fix integer comparison on JSON responses. {pull}13348[13348] -- Fix storage of HTTP bodies to work when JSON/Regex body checks are enabled. {pull}14223[14223] -- Fix recording of SSL cert metadata for Expired/Unvalidated x509 certs. {pull}13687[13687] -- The heartbeat scheduler no longer drops scheduled items when under very high load causing missed deadlines. {pull}14890[14890] - Fixed excessive memory usage introduced in 7.5 due to over-allocating memory for HTTP checks. {pull}15639[15639] *Journalbeat*