From b22e9f556dd2e01f3a82f25c61d9eac2d500c9c9 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Fri, 24 Jul 2020 11:17:30 +0200 Subject: [PATCH 01/15] Convert httpjson input to a v2 cursor input --- .../filebeat/input/default-inputs/inputs.go | 2 + .../filebeat/input/httpjson/config_oauth.go | 4 +- .../filebeat/input/httpjson/httpjson_test.go | 671 +----------------- x-pack/filebeat/input/httpjson/input.go | 615 +++------------- x-pack/filebeat/input/httpjson/requester.go | 459 ++++++++++++ x-pack/filebeat/tests/system/test_httpjson.py | 609 ++++++++++++++++ 6 files changed, 1164 insertions(+), 1196 deletions(-) create mode 100644 x-pack/filebeat/input/httpjson/requester.go create mode 100644 x-pack/filebeat/tests/system/test_httpjson.py diff --git a/x-pack/filebeat/input/default-inputs/inputs.go b/x-pack/filebeat/input/default-inputs/inputs.go index da27367a109..5eade46eafa 100644 --- a/x-pack/filebeat/input/default-inputs/inputs.go +++ b/x-pack/filebeat/input/default-inputs/inputs.go @@ -12,6 +12,7 @@ import ( "github.com/elastic/beats/v7/libbeat/logp" "github.com/elastic/beats/v7/x-pack/filebeat/input/cloudfoundry" "github.com/elastic/beats/v7/x-pack/filebeat/input/http_endpoint" + "github.com/elastic/beats/v7/x-pack/filebeat/input/httpjson" "github.com/elastic/beats/v7/x-pack/filebeat/input/o365audit" ) @@ -27,5 +28,6 @@ func xpackInputs(info beat.Info, log *logp.Logger, store beater.StateStore) []v2 cloudfoundry.Plugin(), http_endpoint.Plugin(), o365audit.Plugin(log, store), + httpjson.Plugin(log, store), } } diff --git a/x-pack/filebeat/input/httpjson/config_oauth.go b/x-pack/filebeat/input/httpjson/config_oauth.go index 6a09cf2fb92..8f2daa06dc5 100644 --- a/x-pack/filebeat/input/httpjson/config_oauth.go +++ b/x-pack/filebeat/input/httpjson/config_oauth.go @@ -66,8 +66,8 @@ func (o *OAuth2) IsEnabled() bool { } // Client wraps the given http.Client and returns a new one that will use the oauth authentication. -func (o *OAuth2) Client(ctx context.Context, client *http.Client) (*http.Client, error) { - ctx = context.WithValue(ctx, oauth2.HTTPClient, client) +func (o *OAuth2) Client(client *http.Client) (*http.Client, error) { + ctx := context.WithValue(context.Background(), oauth2.HTTPClient, client) switch o.GetProvider() { case OAuth2ProviderAzure, OAuth2ProviderDefault: diff --git a/x-pack/filebeat/input/httpjson/httpjson_test.go b/x-pack/filebeat/input/httpjson/httpjson_test.go index a6ebd16ad5d..e9f3af51dad 100644 --- a/x-pack/filebeat/input/httpjson/httpjson_test.go +++ b/x-pack/filebeat/input/httpjson/httpjson_test.go @@ -5,318 +5,15 @@ package httpjson import ( - "context" - "encoding/json" - "fmt" - "io/ioutil" - "log" - "math/rand" "net/http" - "net/http/httptest" - "reflect" "regexp" "strconv" - "sync" "testing" "time" - "golang.org/x/sync/errgroup" - - "github.com/stretchr/testify/assert" - - "github.com/elastic/beats/v7/filebeat/channel" - "github.com/elastic/beats/v7/filebeat/input" - "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/beats/v7/libbeat/common" - "github.com/elastic/beats/v7/libbeat/logp" -) - -const ( - HTTPTestServer int = iota - TLSTestServer - RateLimitRetryServer - ErrorRetryServer - ArrayResponseServer -) - -var ( - once sync.Once ) -func testSetup(t *testing.T) { - t.Helper() - once.Do(func() { - logp.TestingSetup() - }) -} - -func createTestServer(testServer int) *httptest.Server { - var ts *httptest.Server - newServer := httptest.NewServer - switch testServer { - case HTTPTestServer: - ts = createServer(newServer) - case TLSTestServer: - ts = createServer(httptest.NewTLSServer) - case RateLimitRetryServer: - ts = createCustomServer(newServer) - case ErrorRetryServer: - ts = createCustomRetryServer(newServer) - case ArrayResponseServer: - ts = createCustomServerWithArrayResponse(newServer) - default: - ts = createServer(newServer) - } - return ts -} - -func createServer(newServer func(handler http.Handler) *httptest.Server) *httptest.Server { - return newServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.Method == http.MethodPost { - req, err := ioutil.ReadAll(r.Body) - defer r.Body.Close() - if err != nil { - log.Fatalln(err) - } - var m interface{} - err = json.Unmarshal(req, &m) - w.Header().Set("Content-Type", "application/json") - if err != nil { - w.WriteHeader(http.StatusBadRequest) - } else { - w.WriteHeader(http.StatusOK) - w.Write(req) - } - } else { - message := map[string]interface{}{ - "hello": "world", - "embedded": map[string]string{ - "hello": "world", - }, - "list": []map[string]interface{}{ - {"foo": "bar"}, - {"hello": "world"}, - }, - } - b, _ := json.Marshal(message) - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - w.Write(b) - } - })) -} - -func createCustomServer(newServer func(handler http.Handler) *httptest.Server) *httptest.Server { - var isRetry bool - return newServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - if !isRetry { - w.Header().Set("X-Rate-Limit-Limit", "0") - w.Header().Set("X-Rate-Limit-Remaining", "0") - w.Header().Set("X-Rate-Limit-Reset", strconv.FormatInt(time.Now().Unix(), 10)) - w.WriteHeader(http.StatusTooManyRequests) - w.Write([]byte{}) - isRetry = true - } else { - message := map[string]interface{}{ - "hello": "world", - "embedded": map[string]string{ - "hello": "world", - }, - } - b, _ := json.Marshal(message) - w.WriteHeader(http.StatusOK) - w.Write(b) - isRetry = false - } - })) -} - -func createCustomRetryServer(newServer func(handler http.Handler) *httptest.Server) *httptest.Server { - retryCount := 0 - statusCodes := []int{http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, http.StatusHTTPVersionNotSupported, http.StatusVariantAlsoNegotiates, http.StatusInsufficientStorage, http.StatusLoopDetected, http.StatusNotExtended, http.StatusNetworkAuthenticationRequired} - return newServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - // Test retry for two times - if retryCount < 2 { - rand.Seed(time.Now().Unix()) - code := statusCodes[rand.Intn(len(statusCodes))] - w.WriteHeader(code) - w.Write([]byte{}) - retryCount++ - } else { - message := map[string]interface{}{ - "hello": "world", - "embedded": map[string]string{ - "hello": "world", - }, - } - b, _ := json.Marshal(message) - w.WriteHeader(http.StatusOK) - w.Write(b) - retryCount = 0 - } - })) -} - -func createCustomServerWithArrayResponse(newServer func(handler http.Handler) *httptest.Server) *httptest.Server { - return newServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - message := map[string]interface{}{ - "hello": []map[string]interface{}{ - { - "foo": "bar", - "list": []map[string]interface{}{ - {"foo": "bar"}, - {"hello": "world"}, - }, - }, - { - "foo": "bar", - "list": []map[string]interface{}{ - {"foo": "bar"}, - }, - }, - { - "bar": "foo", - "list": []map[string]interface{}{}, - }, - {"bar": "foo"}, - }, - } - b, _ := json.Marshal(message) - w.WriteHeader(http.StatusOK) - w.Write(b) - })) -} - -func runTest(t *testing.T, ts *httptest.Server, m map[string]interface{}, run func(input *HttpjsonInput, out *stubOutleter, t *testing.T)) { - testSetup(t) - defer ts.Close() - m["url"] = ts.URL - cfg := common.MustNewConfigFrom(m) - // Simulate input.Context from Filebeat input runner. - inputCtx := newInputContext() - defer close(inputCtx.Done) - - // Stub outlet for receiving events generated by the input. - eventOutlet := newStubOutlet() - defer eventOutlet.Close() - - connector := channel.ConnectorFunc(func(_ *common.Config, _ beat.ClientConfig) (channel.Outleter, error) { - return eventOutlet, nil - }) - - in, err := NewInput(cfg, connector, inputCtx) - if err != nil { - t.Fatal(err) - } - input := in.(*HttpjsonInput) - defer input.Stop() - - run(input, eventOutlet, t) -} - -func newInputContext() input.Context { - return input.Context{ - Done: make(chan struct{}), - } -} - -type stubOutleter struct { - sync.Mutex - cond *sync.Cond - done bool - Events []beat.Event -} - -func newStubOutlet() *stubOutleter { - o := &stubOutleter{} - o.cond = sync.NewCond(o) - return o -} - -func (o *stubOutleter) waitForEvents(numEvents int) ([]beat.Event, bool) { - o.Lock() - defer o.Unlock() - - for len(o.Events) < numEvents && !o.done { - o.cond.Wait() - } - - size := numEvents - if size >= len(o.Events) { - size = len(o.Events) - } - - out := make([]beat.Event, size) - copy(out, o.Events) - return out, len(out) == numEvents -} - -func (o *stubOutleter) Close() error { - o.Lock() - defer o.Unlock() - o.done = true - return nil -} - -func (o *stubOutleter) Done() <-chan struct{} { return nil } - -func (o *stubOutleter) OnEvent(event beat.Event) bool { - o.Lock() - defer o.Unlock() - o.Events = append(o.Events, event) - o.cond.Broadcast() - return !o.done -} - -func newOAuth2TestServer(t *testing.T) *httptest.Server { - return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - defer r.Body.Close() - - if r.Method != "POST" { - t.Errorf("expected POST request, got %v", r.Method) - return - } - - if err := r.ParseForm(); err != nil { - t.Errorf("no error expected, got %q", err) - return - } - - if gt := r.FormValue("grant_type"); gt != "client_credentials" { - t.Errorf("expected grant_type was client_credentials, got %q", gt) - return - } - - clientID := r.FormValue("client_id") - clientSecret := r.FormValue("client_secret") - if clientID == "" || clientSecret == "" { - clientID, clientSecret, _ = r.BasicAuth() - } - if clientID != "a_client_id" || clientSecret != "a_client_secret" { - t.Errorf("expected client credentials \"a_client_id:a_client_secret\", got \"%s:%s\"", clientID, clientSecret) - } - - if s := r.FormValue("scope"); s != "scope1 scope2" { - t.Errorf("expected scope was scope1+scope2, got %q", s) - return - } - - expectedParams := []string{"v1", "v2"} - if p := r.Form["param1"]; !reflect.DeepEqual(expectedParams, p) { - t.Errorf("expected params were %q, but got %q", expectedParams, p) - return - } - - w.Header().Set("content-type", "application/json") - w.Write([]byte(`{"token_type":"Bearer","expires_in":"3599","access_token":"abcdef1234567890"}`)) - })) -} - -// --- Test Cases - func TestGetNextLinkFromHeader(t *testing.T) { header := make(http.Header) header.Add("Link", "; rel=\"self\"") @@ -346,16 +43,16 @@ func TestCreateRequestInfoFromBody(t *testing.T) { config, common.MapStr(m), common.MapStr(m), - &RequestInfo{ - URL: "", - ContentMap: common.MapStr{}, - Headers: common.MapStr{}, + &requestInfo{ + url: "", + contentMap: common.MapStr{}, + headers: common.MapStr{}, }, ) - if ri.URL != "https://test-123" { + if ri.url != "https://test-123" { t.Fatal("Failed to test createRequestInfoFromBody. URL should be https://test-123.") } - p, err := ri.ContentMap.GetValue("pagination_id") + p, err := ri.contentMap.GetValue("pagination_id") if err != nil { t.Fatal("Failed to test createRequestInfoFromBody with error", err) } @@ -367,7 +64,7 @@ func TestCreateRequestInfoFromBody(t *testing.T) { default: t.Fatalf("Failed to test createRequestInfoFromBody. pagination_id value %T should be int.", pt) } - b, err := ri.ContentMap.GetValue("extra_body") + b, err := ri.contentMap.GetValue("extra_body") if err != nil { t.Fatal("Failed to test createRequestInfoFromBody with error", err) } @@ -432,357 +129,3 @@ func TestGetRateLimitCase3(t *testing.T) { t.Fatal("Failed to test getRateLimit.") } } - -func TestGET(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "interval": 0, - } - ts := createTestServer(HTTPTestServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(1) - if !ok { - t.Fatalf("Expected 1 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestGetHTTPS(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "interval": 0, - "ssl.verification_mode": "none", - } - ts := createTestServer(HTTPTestServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(1) - if !ok { - t.Fatalf("Expected 1 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestRateLimitRetry(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "interval": 0, - } - ts := createTestServer(RateLimitRetryServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(1) - if !ok { - t.Fatalf("Expected 1 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestErrorRetry(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "interval": 0, - } - ts := createTestServer(ErrorRetryServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(1) - if !ok { - t.Fatalf("Expected 1 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestArrayResponse(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "json_objects_array": "hello", - "interval": 0, - } - ts := createTestServer(ArrayResponseServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(2) - if !ok { - t.Fatalf("Expected 2 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestPOST(t *testing.T) { - m := map[string]interface{}{ - "http_method": "POST", - "http_request_body": map[string]interface{}{"test": "abc", "testNested": map[string]interface{}{"testNested1": 123}}, - "interval": 0, - } - ts := createTestServer(HTTPTestServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(1) - if !ok { - t.Fatalf("Expected 1 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestRepeatedPOST(t *testing.T) { - m := map[string]interface{}{ - "http_method": "POST", - "http_request_body": map[string]interface{}{"test": "abc", "testNested": map[string]interface{}{"testNested1": 123}}, - "interval": 10 ^ 9, - } - ts := createTestServer(HTTPTestServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(3) - if !ok { - t.Fatalf("Expected 3 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestRunStop(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "interval": 0, - } - ts := createTestServer(HTTPTestServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - input.Run() - input.Stop() - input.Run() - input.Stop() - }) -} - -func TestOAuth2(t *testing.T) { - oAuth2Server := newOAuth2TestServer(t) - defer oAuth2Server.Close() - ts := createTestServer(HTTPTestServer) - defer ts.Close() - m := map[string]interface{}{ - "http_method": "GET", - "oauth2.client.id": "a_client_id", - "oauth2.client.secret": "a_client_secret", - "oauth2.token_url": oAuth2Server.URL, - "oauth2.endpoint_params": map[string][]string{ - "param1": {"v1", "v2"}, - }, - "oauth2.scopes": []string{"scope1", "scope2"}, - "interval": 0, - } - - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(1) - if !ok { - t.Fatalf("Expected 1 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestSplitResponseWithKey(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "split_events_by": "list", - "interval": 0, - } - ts := createTestServer(HTTPTestServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(2) - if !ok { - t.Fatalf("Expected 2 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestSplitResponseWithoutKey(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "split_events_by": "not_found", - "interval": 0, - } - ts := createTestServer(HTTPTestServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(1) - if !ok { - t.Fatalf("Expected 1 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - }) -} - -func TestArrayWithSplitResponse(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "json_objects_array": "hello", - "split_events_by": "list", - "interval": 0, - } - - expectedFields := []string{ - `{ - "foo": "bar", - "list": { - "foo": "bar" - } - }`, - `{ - "foo": "bar", - "list": { - "hello": "world" - } - }`, - `{ - "foo": "bar", - "list": { - "foo": "bar" - } - }`, - `{ - "bar": "foo", - "list": [] - }`, - `{"bar": "foo"}`, - } - - ts := createTestServer(ArrayResponseServer) - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(5) - if !ok { - t.Fatalf("Expected 5 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - - for i, e := range events { - message, _ := e.GetValue("message") - assert.JSONEq(t, expectedFields[i], message.(string)) - } - }) -} - -func TestCursor(t *testing.T) { - m := map[string]interface{}{ - "http_method": "GET", - "date_cursor.field": "@timestamp", - "date_cursor.url_field": "$filter", - "date_cursor.value_template": "alertCreationTime ge {{.}}", - "date_cursor.initial_interval": "10m", - "date_cursor.date_format": "2006-01-02T15:04:05Z", - } - - timeNow = func() time.Time { - t, _ := time.Parse("2006-01-02T15:04:05Z", "2002-10-02T15:10:00Z") - return t - } - - const ( - expectedQuery = "%24filter=alertCreationTime+ge+2002-10-02T15%3A00%3A00Z" - expectedNextCursorValue = "2002-10-02T15:00:01Z" - expectedNextQuery = "%24filter=alertCreationTime+ge+2002-10-02T15%3A00%3A01Z" - ) - var gotQuery string - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - gotQuery = r.URL.Query().Encode() - w.Write([]byte(`[{"@timestamp":"2002-10-02T15:00:00Z"},{"@timestamp":"2002-10-02T15:00:01Z"}]`)) - })) - - runTest(t, ts, m, func(input *HttpjsonInput, out *stubOutleter, t *testing.T) { - group, _ := errgroup.WithContext(context.Background()) - group.Go(input.run) - - events, ok := out.waitForEvents(2) - if !ok { - t.Fatalf("Expected 2 events, but got %d.", len(events)) - } - input.Stop() - - if err := group.Wait(); err != nil { - t.Fatal(err) - } - - assert.Equal(t, expectedQuery, gotQuery) - assert.Equal(t, expectedNextCursorValue, input.nextCursorValue) - assert.Equal(t, fmt.Sprintf("%s?%s", ts.URL, expectedNextQuery), input.getURL()) - }) -} diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index 50677876b1f..333547ccc25 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -5,596 +5,151 @@ package httpjson import ( - "bytes" - "context" - "encoding/json" "fmt" - "io" - "io/ioutil" "net" "net/http" "net/url" - "regexp" - "strconv" - "sync" "time" - "github.com/pkg/errors" - - "github.com/elastic/beats/v7/filebeat/channel" - "github.com/elastic/beats/v7/filebeat/input" + input "github.com/elastic/beats/v7/filebeat/input/v2" + cursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/beats/v7/libbeat/common" "github.com/elastic/beats/v7/libbeat/common/transport/tlscommon" "github.com/elastic/beats/v7/libbeat/common/useragent" + "github.com/elastic/beats/v7/libbeat/feature" "github.com/elastic/beats/v7/libbeat/logp" - + "github.com/elastic/go-concert/ctxtool" "github.com/hashicorp/go-retryablehttp" - "go.uber.org/zap" ) const ( inputName = "httpjson" ) -var userAgent = useragent.UserAgent("Filebeat") +var ( + userAgent = useragent.UserAgent("Filebeat") -// for testing -var timeNow = time.Now + // for testing + timeNow = time.Now +) -func init() { - err := input.Register(inputName, NewInput) - if err != nil { - panic(errors.Wrapf(err, "failed to register %v input", inputName)) +// httpJSONInput struct has the HttpJsonInput configuration and other userful info. +type httpJSONInput struct{} + +// Plugin create a stateful input Plugin collecting logs from HTTPJSONInput. +func Plugin(log *logp.Logger, store cursor.StateStore) input.Plugin { + return input.Plugin{ + Name: inputName, + Stability: feature.Beta, + Deprecated: false, + Info: "HTTP JSON Input", + Manager: &cursor.InputManager{ + Logger: log.Named(inputName), + StateStore: store, + Type: inputName, + Configure: configure, + }, } } -// HttpjsonInput struct has the HttpJsonInput configuration and other userful info. -type HttpjsonInput struct { - config - log *logp.Logger - outlet channel.Outleter // Output of received messages. - inputCtx context.Context // Wraps the Done channel from parent input.Context. - - workerCtx context.Context // Worker goroutine context. It's cancelled when the input stops or the worker exits. - workerCancel context.CancelFunc // Used to signal that the worker should stop. - workerOnce sync.Once // Guarantees that the worker goroutine is only started once. - workerWg sync.WaitGroup // Waits on worker goroutine. - - nextCursorValue string -} - -// RequestInfo struct has the information for generating an HTTP request -type RequestInfo struct { - URL string - ContentMap common.MapStr - Headers common.MapStr -} - -type retryLogger struct { - log *logp.Logger -} - -func newRetryLogger() *retryLogger { - return &retryLogger{ - log: logp.NewLogger("httpjson.retryablehttp", zap.AddCallerSkip(1)), +func configure(cfg *common.Config) ([]cursor.Source, cursor.Input, error) { + config := defaultConfig() + if err := cfg.Unpack(&config); err != nil { + return nil, nil, err } -} - -func (l *retryLogger) Printf(s string, args ...interface{}) { - l.log.Debugf(s, args...) -} -// NewInput creates a new httpjson input -func NewInput( - cfg *common.Config, - connector channel.Connector, - inputContext input.Context, -) (input.Input, error) { - // Extract and validate the input's configuration. - conf := defaultConfig() - if err := cfg.Unpack(&conf); err != nil { - return nil, err - } - // Build outlet for events. - out, err := connector.Connect(cfg) + httpClient, err := newHTTPClient(config) if err != nil { - return nil, err - } - - // Wrap input.Context's Done channel with a context.Context. This goroutine - // stops with the parent closes the Done channel. - inputCtx, cancelInputCtx := context.WithCancel(context.Background()) - go func() { - defer cancelInputCtx() - select { - case <-inputContext.Done: - case <-inputCtx.Done(): - } - }() - - // If the input ever needs to be made restartable, then context would need - // to be recreated with each restart. - workerCtx, workerCancel := context.WithCancel(inputCtx) - - in := &HttpjsonInput{ - config: conf, - log: logp.NewLogger("httpjson").With( - "url", conf.URL), - outlet: out, - inputCtx: inputCtx, - workerCtx: workerCtx, - workerCancel: workerCancel, - } - - in.log.Info("Initialized httpjson input.") - return in, nil -} - -// Run starts the input worker then returns. Only the first invocation -// will ever start the worker. -func (in *HttpjsonInput) Run() { - in.workerOnce.Do(func() { - in.workerWg.Add(1) - go func() { - in.log.Info("httpjson input worker has started.") - defer in.log.Info("httpjson input worker has stopped.") - defer in.workerWg.Done() - defer in.workerCancel() - if err := in.run(); err != nil { - in.log.Error(err) - return - } - }() - }) -} - -// createHTTPRequest creates an HTTP/HTTPs request for the input -func (in *HttpjsonInput) createHTTPRequest(ctx context.Context, ri *RequestInfo) (*http.Request, error) { - var body io.Reader - if len(ri.ContentMap) == 0 || in.config.NoHTTPBody { - body = nil - } else { - b, err := json.Marshal(ri.ContentMap) - if err != nil { - return nil, err - } - body = bytes.NewReader(b) - } - req, err := http.NewRequest(in.config.HTTPMethod, ri.URL, body) - if err != nil { - return nil, err - } - req = req.WithContext(ctx) - req.Header.Set("Accept", "application/json") - req.Header.Set("Content-Type", "application/json") - req.Header.Set("User-Agent", userAgent) - if in.config.APIKey != "" { - if in.config.AuthenticationScheme != "" { - req.Header.Set("Authorization", in.config.AuthenticationScheme+" "+in.config.APIKey) - } else { - req.Header.Set("Authorization", in.config.APIKey) - } - } - for k, v := range ri.Headers { - switch vv := v.(type) { - case string: - req.Header.Set(k, vv) - default: - } - } - return req, nil -} - -// processEventArray publishes an event for each object contained in the array. It returns the last object in the array and an error if any. -func (in *HttpjsonInput) processEventArray(events []interface{}) (map[string]interface{}, error) { - var last map[string]interface{} - for _, t := range events { - switch v := t.(type) { - case map[string]interface{}: - for _, e := range in.splitEvent(v) { - last = e - d, err := json.Marshal(e) - if err != nil { - return nil, errors.Wrapf(err, "failed to marshal %+v", e) - } - ok := in.outlet.OnEvent(makeEvent(string(d))) - if !ok { - return nil, errors.New("function OnEvent returned false") - } - } - default: - return nil, errors.Errorf("expected only JSON objects in the array but got a %T", v) - } - } - return last, nil -} - -func (in *HttpjsonInput) splitEvent(event map[string]interface{}) []map[string]interface{} { - m := common.MapStr(event) - - hasSplitKey, _ := m.HasKey(in.config.SplitEventsBy) - if in.config.SplitEventsBy == "" || !hasSplitKey { - return []map[string]interface{}{event} - } - - splitOnIfc, _ := m.GetValue(in.config.SplitEventsBy) - splitOn, ok := splitOnIfc.([]interface{}) - // if not an array or is empty, we do nothing - if !ok || len(splitOn) == 0 { - return []map[string]interface{}{event} + return nil, nil, err } - var events []map[string]interface{} - for _, split := range splitOn { - s, ok := split.(map[string]interface{}) - // if not an object, we do nothing - if !ok { - return []map[string]interface{}{event} - } + r := &requester{config: config, client: httpClient} - mm := m.Clone() - _, err := mm.Put(in.config.SplitEventsBy, s) - if err != nil { - return []map[string]interface{}{event} - } - - events = append(events, mm) - } + in := &httpJSONInput{} - return events + return []cursor.Source{r}, in, nil } -// getNextLinkFromHeader retrieves the next URL for pagination from the HTTP Header of the response -func getNextLinkFromHeader(header http.Header, fieldName string, re *regexp.Regexp) (string, error) { - links, ok := header[fieldName] - if !ok { - return "", errors.Errorf("field %s does not exist in the HTTP Header", fieldName) - } - for _, link := range links { - matchArray := re.FindAllStringSubmatch(link, -1) - if len(matchArray) == 1 { - return matchArray[0][1], nil - } - } - return "", nil -} +func (*httpJSONInput) Name() string { return inputName } -// getRateLimit get the rate limit value if specified in the HTTP Header of the response, -// and returns an init64 value in seconds since unix epoch for rate limit reset time. -// When there is a remaining rate limit quota, or when the rate limit reset time has expired, it -// returns 0 for the epoch value. -func getRateLimit(header http.Header, rateLimit *RateLimit) (int64, error) { - if rateLimit != nil { - if rateLimit.Remaining != "" { - remaining := header.Get(rateLimit.Remaining) - if remaining == "" { - return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", rateLimit.Remaining) - } - m, err := strconv.ParseInt(remaining, 10, 64) - if err != nil { - return 0, errors.Wrapf(err, "failed to parse rate-limit remaining value") - } - if m == 0 { - reset := header.Get(rateLimit.Reset) - if reset == "" { - return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", rateLimit.Reset) - } - epoch, err := strconv.ParseInt(reset, 10, 64) - if err != nil { - return 0, errors.Wrapf(err, "failed to parse rate-limit reset value") - } - if time.Unix(epoch, 0).Sub(time.Now()) <= 0 { - return 0, nil - } - return epoch, nil - } - } - } - return 0, nil -} - -// applyRateLimit applies appropriate rate limit if specified in the HTTP Header of the response -func (in *HttpjsonInput) applyRateLimit(ctx context.Context, header http.Header, rateLimit *RateLimit) error { - epoch, err := getRateLimit(header, rateLimit) +func (*httpJSONInput) Test(source cursor.Source, ctx input.TestContext) error { + requester := source.(*requester) + url, err := url.Parse(requester.config.URL) if err != nil { return err } - t := time.Unix(epoch, 0) - w := time.Until(t) - if epoch == 0 || w <= 0 { - in.log.Debugf("Rate Limit: No need to apply rate limit.") - return nil - } - in.log.Debugf("Rate Limit: Wait until %v for the rate limit to reset.", t) - ticker := time.NewTicker(w) - defer ticker.Stop() - select { - case <-ctx.Done(): - in.log.Info("Context done.") - return nil - case <-ticker.C: - in.log.Debug("Rate Limit: time is up.") - return nil - } -} - -// createRequestInfoFromBody creates a new RequestInfo for a new HTTP request in pagination based on HTTP response body -func createRequestInfoFromBody(config *Pagination, response, last common.MapStr, ri *RequestInfo) (*RequestInfo, error) { - // we try to get it from last element, if not found, from the original response - v, err := last.GetValue(config.IDField) - if err == common.ErrKeyNotFound { - v, err = response.GetValue(config.IDField) - } - - if err == common.ErrKeyNotFound { - return nil, nil - } - - if err != nil { - return nil, errors.Wrapf(err, "failed to retrieve id_field for pagination") - } - - if config.RequestField != "" { - ri.ContentMap.Put(config.RequestField, v) - if config.URL != "" { - ri.URL = config.URL - } - } else if config.URLField != "" { - url, err := url.Parse(ri.URL) - if err == nil { - q := url.Query() - q.Set(config.URLField, fmt.Sprint(v)) - url.RawQuery = q.Encode() - ri.URL = url.String() - } - } else { - switch vt := v.(type) { - case string: - ri.URL = vt - default: - return nil, errors.New("pagination ID is not of string type") - } - } - if len(config.ExtraBodyContent) > 0 { - ri.ContentMap.Update(common.MapStr(config.ExtraBodyContent)) - } - return ri, nil -} - -// processHTTPRequest processes HTTP request, and handles pagination if enabled -func (in *HttpjsonInput) processHTTPRequest(ctx context.Context, client *http.Client, ri *RequestInfo) error { - ri.URL = in.getURL() - - var ( - m, v interface{} - response, mm map[string]interface{} - ) - for { - req, err := in.createHTTPRequest(ctx, ri) - if err != nil { - return errors.Wrapf(err, "failed to create http request") + port := func() string { + if url.Port() != "" { + return url.Port() } - msg, err := client.Do(req) - if err != nil { - return errors.Wrapf(err, "failed to execute http client.Do") - } - responseData, err := ioutil.ReadAll(msg.Body) - header := msg.Header - msg.Body.Close() - if err != nil { - return errors.Wrapf(err, "failed to read http.response.body") - } - if msg.StatusCode != http.StatusOK { - in.log.Debugw("HTTP request failed", "http.response.status_code", msg.StatusCode, "http.response.body", string(responseData)) - if msg.StatusCode == http.StatusTooManyRequests { - if err = in.applyRateLimit(ctx, header, in.config.RateLimit); err != nil { - return err - } - continue - } - return errors.Errorf("http request was unsuccessful with a status code %d", msg.StatusCode) - } - - err = json.Unmarshal(responseData, &m) - if err != nil { - in.log.Debug("failed to unmarshal http.response.body", string(responseData)) - return errors.Wrapf(err, "failed to unmarshal http.response.body") - } - switch obj := m.(type) { - // Top level Array - case []interface{}: - mm, err = in.processEventArray(obj) - if err != nil { - return err - } - case map[string]interface{}: - response = obj - if in.config.JSONObjects == "" { - mm, err = in.processEventArray([]interface{}{obj}) - if err != nil { - return err - } - } else { - v, err = common.MapStr(obj).GetValue(in.config.JSONObjects) - if err != nil { - if err == common.ErrKeyNotFound { - break - } - return err - } - switch ts := v.(type) { - case []interface{}: - mm, err = in.processEventArray(ts) - if err != nil { - return err - } - default: - return errors.Errorf("content of %s is not a valid array", in.config.JSONObjects) - } - } - default: - in.log.Debug("http.response.body is not a valid JSON object", string(responseData)) - return errors.Errorf("http.response.body is not a valid JSON object, but a %T", obj) - } - - if mm != nil && in.config.Pagination.IsEnabled() { - if in.config.Pagination.Header != nil { - // Pagination control using HTTP Header - url, err := getNextLinkFromHeader(header, in.config.Pagination.Header.FieldName, in.config.Pagination.Header.RegexPattern) - if err != nil { - return errors.Wrapf(err, "failed to retrieve the next URL for pagination") - } - if ri.URL == url || url == "" { - in.log.Info("Pagination finished.") - break - } - ri.URL = url - if err = in.applyRateLimit(ctx, header, in.config.RateLimit); err != nil { - return err - } - in.log.Info("Continuing with pagination to URL: ", ri.URL) - continue - } else { - // Pagination control using HTTP Body fields - ri, err = createRequestInfoFromBody(in.config.Pagination, common.MapStr(response), common.MapStr(mm), ri) - if err != nil { - return err - } - if ri == nil { - break - } - if err = in.applyRateLimit(ctx, header, in.config.RateLimit); err != nil { - return err - } - in.log.Info("Continuing with pagination to URL: ", ri.URL) - continue - } + switch url.Scheme { + case "https": + return "443" } - break - } + return "80" + }() - if mm != nil && in.config.DateCursor.IsEnabled() { - in.advanceCursor(common.MapStr(mm)) + _, err = net.DialTimeout("tcp", fmt.Sprintf("%s:%s", url.Hostname(), port), time.Second) + if err != nil { + return fmt.Errorf("url %q is unreachable", requester.config.URL) } return nil } -func (in *HttpjsonInput) getURL() string { - if !in.config.DateCursor.IsEnabled() { - return in.config.URL - } - - var dateStr string - if in.nextCursorValue == "" { - t := timeNow().UTC().Add(-in.config.DateCursor.InitialInterval) - dateStr = t.Format(in.config.DateCursor.GetDateFormat()) - } else { - dateStr = in.nextCursorValue - } - - url, err := url.Parse(in.config.URL) - if err != nil { - return in.config.URL - } - - q := url.Query() - - var value string - if in.config.DateCursor.ValueTemplate == nil { - value = dateStr - } else { - buf := new(bytes.Buffer) - if err := in.config.DateCursor.ValueTemplate.Execute(buf, dateStr); err != nil { - return in.config.URL - } - value = buf.String() - } +// Run starts the input worker then returns. Only the first invocation +// will ever start the worker. +func (in *httpJSONInput) Run( + ctx input.Context, + source cursor.Source, + cursor cursor.Cursor, + publisher cursor.Publisher, +) error { + requester := source.(*requester) - q.Set(in.config.DateCursor.URLField, value) + log := ctx.Logger.With("url", requester.config.URL) + requester.log = log - url.RawQuery = q.Encode() + requester.loadCheckpoint(cursor) - return url.String() -} + stdCtx := ctxtool.FromCanceller(ctx.Cancelation) -func (in *HttpjsonInput) advanceCursor(m common.MapStr) { - if in.config.DateCursor.Field == "" { - in.nextCursorValue = time.Now().UTC().Format(in.config.DateCursor.GetDateFormat()) - return + ri := &requestInfo{ + contentMap: common.MapStr{}, + headers: requester.config.HTTPHeaders, } - v, err := m.GetValue(in.config.DateCursor.Field) - if err != nil { - in.log.Warnf("date_cursor field: %q", err) - return - } - switch t := v.(type) { - case string: - _, err := time.Parse(in.config.DateCursor.GetDateFormat(), t) - if err != nil { - in.log.Warn("date_cursor field does not have the expected layout") - return - } - in.nextCursorValue = t - default: - in.log.Warn("date_cursor field must be a string, cursor will not advance") - return + if requester.config.HTTPMethod == "POST" && + requester.config.HTTPRequestBody != nil { + ri.contentMap.Update(common.MapStr(requester.config.HTTPRequestBody)) } -} - -func (in *HttpjsonInput) run() error { - ctx, cancel := context.WithCancel(in.workerCtx) - defer cancel() - client, err := in.newHTTPClient(ctx) - if err != nil { - return err - } - - ri := &RequestInfo{ - ContentMap: common.MapStr{}, - Headers: in.HTTPHeaders, - } - if in.config.HTTPMethod == "POST" && in.config.HTTPRequestBody != nil { - ri.ContentMap.Update(common.MapStr(in.config.HTTPRequestBody)) - } - err = in.processHTTPRequest(ctx, client, ri) - if err == nil && in.Interval > 0 { - ticker := time.NewTicker(in.Interval) + err := requester.processHTTPRequest(stdCtx, publisher, ri) + if err == nil && requester.config.Interval > 0 { + ticker := time.NewTicker(requester.config.Interval) defer ticker.Stop() for { select { - case <-ctx.Done(): - in.log.Info("Context done.") + case <-stdCtx.Done(): + log.Info("Context done.") return nil case <-ticker.C: - in.log.Info("Process another repeated request.") - err = in.processHTTPRequest(ctx, client, ri) + log.Info("Process another repeated request.") + err = requester.processHTTPRequest(stdCtx, publisher, ri) if err != nil { return err } } } } - return err -} - -// Stop stops the misp input and waits for it to fully stop. -func (in *HttpjsonInput) Stop() { - in.workerCancel() - in.workerWg.Wait() -} -// Wait is an alias for Stop. -func (in *HttpjsonInput) Wait() { - in.Stop() + return err } -func (in *HttpjsonInput) newHTTPClient(ctx context.Context) (*http.Client, error) { - tlsConfig, err := tlscommon.LoadTLSConfig(in.config.TLS) +func newHTTPClient(config config) (*http.Client, error) { + tlsConfig, err := tlscommon.LoadTLSConfig(config.TLS) if err != nil { return nil, err } @@ -604,23 +159,23 @@ func (in *HttpjsonInput) newHTTPClient(ctx context.Context) (*http.Client, error HTTPClient: &http.Client{ Transport: &http.Transport{ DialContext: (&net.Dialer{ - Timeout: in.config.HTTPClientTimeout, + Timeout: config.HTTPClientTimeout, }).DialContext, TLSClientConfig: tlsConfig.ToConfig(), DisableKeepAlives: true, }, - Timeout: in.config.HTTPClientTimeout, + Timeout: config.HTTPClientTimeout, }, Logger: newRetryLogger(), - RetryWaitMin: in.config.RetryWaitMin, - RetryWaitMax: in.config.RetryWaitMax, - RetryMax: in.config.RetryMax, + RetryWaitMin: config.RetryWaitMin, + RetryWaitMax: config.RetryWaitMax, + RetryMax: config.RetryMax, CheckRetry: retryablehttp.DefaultRetryPolicy, Backoff: retryablehttp.DefaultBackoff, } - if in.config.OAuth2.IsEnabled() { - return in.config.OAuth2.Client(ctx, client.StandardClient()) + if config.OAuth2.IsEnabled() { + return config.OAuth2.Client(client.StandardClient()) } return client.StandardClient(), nil diff --git a/x-pack/filebeat/input/httpjson/requester.go b/x-pack/filebeat/input/httpjson/requester.go new file mode 100644 index 00000000000..a580296df91 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/requester.go @@ -0,0 +1,459 @@ +package httpjson + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "net/http" + "net/url" + "regexp" + "strconv" + "time" + + cursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" + "github.com/pkg/errors" + "go.uber.org/zap" +) + +type requestInfo struct { + url string + contentMap common.MapStr + headers common.MapStr +} + +type retryLogger struct { + log *logp.Logger +} + +func newRetryLogger() *retryLogger { + return &retryLogger{ + log: logp.NewLogger("httpjson.retryablehttp", zap.AddCallerSkip(1)), + } +} + +func (l *retryLogger) Printf(s string, args ...interface{}) { + l.log.Debugf(s, args...) +} + +type requester struct { + log *logp.Logger + config config + client *http.Client + cursorValue string +} + +func (r requester) Name() string { return r.config.URL } + +// createHTTPRequest creates an HTTP/HTTPs request for the input +func (r *requester) createHTTPRequest(ctx context.Context, ri *requestInfo) (*http.Request, error) { + var body io.Reader + if len(ri.contentMap) == 0 || r.config.NoHTTPBody { + body = nil + } else { + b, err := json.Marshal(ri.contentMap) + if err != nil { + return nil, err + } + body = bytes.NewReader(b) + } + req, err := http.NewRequest(r.config.HTTPMethod, ri.url, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", userAgent) + if r.config.APIKey != "" { + if r.config.AuthenticationScheme != "" { + req.Header.Set("Authorization", r.config.AuthenticationScheme+" "+r.config.APIKey) + } else { + req.Header.Set("Authorization", r.config.APIKey) + } + } + for k, v := range ri.headers { + switch vv := v.(type) { + case string: + req.Header.Set(k, vv) + default: + } + } + return req, nil +} + +// processEventArray publishes an event for each object contained in the array. It returns the last object in the array and an error if any. +func (r *requester) processEventArray(publisher cursor.Publisher, events []interface{}) (map[string]interface{}, error) { + var last map[string]interface{} + for _, t := range events { + switch v := t.(type) { + case map[string]interface{}: + for _, e := range r.splitEvent(v) { + last = e + d, err := json.Marshal(e) + if err != nil { + return nil, errors.Wrapf(err, "failed to marshal %+v", e) + } + if err := publisher.Publish(makeEvent(string(d)), r.cursorValue); err != nil { + return nil, err + } + } + default: + return nil, errors.Errorf("expected only JSON objects in the array but got a %T", v) + } + } + return last, nil +} + +func (r *requester) splitEvent(event map[string]interface{}) []map[string]interface{} { + m := common.MapStr(event) + + hasSplitKey, _ := m.HasKey(r.config.SplitEventsBy) + if r.config.SplitEventsBy == "" || !hasSplitKey { + return []map[string]interface{}{event} + } + + splitOnIfc, _ := m.GetValue(r.config.SplitEventsBy) + splitOn, ok := splitOnIfc.([]interface{}) + // if not an array or is empty, we do nothing + if !ok || len(splitOn) == 0 { + return []map[string]interface{}{event} + } + + var events []map[string]interface{} + for _, split := range splitOn { + s, ok := split.(map[string]interface{}) + // if not an object, we do nothing + if !ok { + return []map[string]interface{}{event} + } + + mm := m.Clone() + _, err := mm.Put(r.config.SplitEventsBy, s) + if err != nil { + return []map[string]interface{}{event} + } + + events = append(events, mm) + } + + return events +} + +// getNextLinkFromHeader retrieves the next URL for pagination from the HTTP Header of the response +func getNextLinkFromHeader(header http.Header, fieldName string, re *regexp.Regexp) (string, error) { + links, ok := header[fieldName] + if !ok { + return "", errors.Errorf("field %s does not exist in the HTTP Header", fieldName) + } + for _, link := range links { + matchArray := re.FindAllStringSubmatch(link, -1) + if len(matchArray) == 1 { + return matchArray[0][1], nil + } + } + return "", nil +} + +// getRateLimit get the rate limit value if specified in the HTTP Header of the response, +// and returns an init64 value in seconds since unix epoch for rate limit reset time. +// When there is a remaining rate limit quota, or when the rate limit reset time has expired, it +// returns 0 for the epoch value. +func getRateLimit(header http.Header, rateLimit *RateLimit) (int64, error) { + if rateLimit != nil { + if rateLimit.Remaining != "" { + remaining := header.Get(rateLimit.Remaining) + if remaining == "" { + return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", rateLimit.Remaining) + } + m, err := strconv.ParseInt(remaining, 10, 64) + if err != nil { + return 0, errors.Wrapf(err, "failed to parse rate-limit remaining value") + } + if m == 0 { + reset := header.Get(rateLimit.Reset) + if reset == "" { + return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", rateLimit.Reset) + } + epoch, err := strconv.ParseInt(reset, 10, 64) + if err != nil { + return 0, errors.Wrapf(err, "failed to parse rate-limit reset value") + } + if time.Unix(epoch, 0).Sub(time.Now()) <= 0 { + return 0, nil + } + return epoch, nil + } + } + } + return 0, nil +} + +// applyRateLimit applies appropriate rate limit if specified in the HTTP Header of the response +func (r *requester) applyRateLimit(ctx context.Context, header http.Header, rateLimit *RateLimit) error { + epoch, err := getRateLimit(header, rateLimit) + if err != nil { + return err + } + t := time.Unix(epoch, 0) + w := time.Until(t) + if epoch == 0 || w <= 0 { + r.log.Debugf("Rate Limit: No need to apply rate limit.") + return nil + } + r.log.Debugf("Rate Limit: Wait until %v for the rate limit to reset.", t) + ticker := time.NewTicker(w) + defer ticker.Stop() + select { + case <-ctx.Done(): + r.log.Info("Context done.") + return nil + case <-ticker.C: + r.log.Debug("Rate Limit: time is up.") + return nil + } +} + +// createRequestInfoFromBody creates a new RequestInfo for a new HTTP request in pagination based on HTTP response body +func createRequestInfoFromBody(config *Pagination, response, last common.MapStr, ri *requestInfo) (*requestInfo, error) { + // we try to get it from last element, if not found, from the original response + v, err := last.GetValue(config.IDField) + if err == common.ErrKeyNotFound { + v, err = response.GetValue(config.IDField) + } + + if err == common.ErrKeyNotFound { + return nil, nil + } + + if err != nil { + return nil, errors.Wrapf(err, "failed to retrieve id_field for pagination") + } + + if config.RequestField != "" { + ri.contentMap.Put(config.RequestField, v) + if config.URL != "" { + ri.url = config.URL + } + } else if config.URLField != "" { + url, err := url.Parse(ri.url) + if err == nil { + q := url.Query() + q.Set(config.URLField, fmt.Sprint(v)) + url.RawQuery = q.Encode() + ri.url = url.String() + } + } else { + switch vt := v.(type) { + case string: + ri.url = vt + default: + return nil, errors.New("pagination ID is not of string type") + } + } + if len(config.ExtraBodyContent) > 0 { + ri.contentMap.Update(common.MapStr(config.ExtraBodyContent)) + } + return ri, nil +} + +// processHTTPRequest processes HTTP request, and handles pagination if enabled +func (r *requester) processHTTPRequest(ctx context.Context, publisher cursor.Publisher, ri *requestInfo) error { + ri.url = r.getURL() + fmt.Println(ri.url) + var ( + m, v interface{} + response, mm map[string]interface{} + ) + + for { + req, err := r.createHTTPRequest(ctx, ri) + if err != nil { + return errors.Wrapf(err, "failed to create http request") + } + msg, err := r.client.Do(req) + if err != nil { + return errors.Wrapf(err, "failed to execute http client.Do") + } + responseData, err := ioutil.ReadAll(msg.Body) + header := msg.Header + msg.Body.Close() + if err != nil { + return errors.Wrapf(err, "failed to read http.response.body") + } + if msg.StatusCode != http.StatusOK { + r.log.Debugw("HTTP request failed", "http.response.status_code", msg.StatusCode, "http.response.body", string(responseData)) + if msg.StatusCode == http.StatusTooManyRequests { + if err = r.applyRateLimit(ctx, header, r.config.RateLimit); err != nil { + return err + } + continue + } + return errors.Errorf("http request was unsuccessful with a status code %d", msg.StatusCode) + } + + err = json.Unmarshal(responseData, &m) + if err != nil { + r.log.Debug("failed to unmarshal http.response.body", string(responseData)) + return errors.Wrapf(err, "failed to unmarshal http.response.body") + } + switch obj := m.(type) { + // Top level Array + case []interface{}: + mm, err = r.processEventArray(publisher, obj) + if err != nil { + return err + } + case map[string]interface{}: + response = obj + if r.config.JSONObjects == "" { + mm, err = r.processEventArray(publisher, []interface{}{obj}) + if err != nil { + return err + } + } else { + v, err = common.MapStr(obj).GetValue(r.config.JSONObjects) + if err != nil { + if err == common.ErrKeyNotFound { + break + } + return err + } + switch ts := v.(type) { + case []interface{}: + mm, err = r.processEventArray(publisher, ts) + if err != nil { + return err + } + default: + return errors.Errorf("content of %s is not a valid array", r.config.JSONObjects) + } + } + default: + r.log.Debug("http.response.body is not a valid JSON object", string(responseData)) + return errors.Errorf("http.response.body is not a valid JSON object, but a %T", obj) + } + + if mm != nil && r.config.Pagination.IsEnabled() { + if r.config.Pagination.Header != nil { + // Pagination control using HTTP Header + url, err := getNextLinkFromHeader(header, r.config.Pagination.Header.FieldName, r.config.Pagination.Header.RegexPattern) + if err != nil { + return errors.Wrapf(err, "failed to retrieve the next URL for pagination") + } + if ri.url == url || url == "" { + r.log.Info("Pagination finished.") + break + } + ri.url = url + if err = r.applyRateLimit(ctx, header, r.config.RateLimit); err != nil { + return err + } + r.log.Info("Continuing with pagination to URL: ", ri.url) + continue + } else { + // Pagination control using HTTP Body fields + ri, err = createRequestInfoFromBody(r.config.Pagination, common.MapStr(response), common.MapStr(mm), ri) + if err != nil { + return err + } + if ri == nil { + break + } + if err = r.applyRateLimit(ctx, header, r.config.RateLimit); err != nil { + return err + } + r.log.Info("Continuing with pagination to URL: ", ri.url) + continue + } + } + break + } + + if mm != nil && r.config.DateCursor.IsEnabled() { + r.advanceCursor(common.MapStr(mm)) + } + + return nil +} + +func (r *requester) getURL() string { + if !r.config.DateCursor.IsEnabled() { + return r.config.URL + } + + var dateStr string + if r.cursorValue == "" { + t := timeNow().UTC().Add(-r.config.DateCursor.InitialInterval) + dateStr = t.Format(r.config.DateCursor.GetDateFormat()) + } else { + dateStr = r.cursorValue + } + + url, err := url.Parse(r.config.URL) + if err != nil { + return r.config.URL + } + + q := url.Query() + + var value string + if r.config.DateCursor.ValueTemplate == nil { + value = dateStr + } else { + buf := new(bytes.Buffer) + if err := r.config.DateCursor.ValueTemplate.Execute(buf, dateStr); err != nil { + return r.config.URL + } + value = buf.String() + } + + q.Set(r.config.DateCursor.URLField, value) + + url.RawQuery = q.Encode() + + return url.String() +} + +func (r *requester) advanceCursor(m common.MapStr) { + if r.config.DateCursor.Field == "" { + r.cursorValue = time.Now().UTC().Format(r.config.DateCursor.GetDateFormat()) + return + } + + v, err := m.GetValue(r.config.DateCursor.Field) + if err != nil { + r.log.Warnf("date_cursor field: %q", err) + return + } + switch t := v.(type) { + case string: + _, err := time.Parse(r.config.DateCursor.GetDateFormat(), t) + if err != nil { + r.log.Warn("date_cursor field does not have the expected layout") + return + } + r.cursorValue = t + default: + r.log.Warn("date_cursor field must be a string, cursor will not advance") + return + } +} + +func (r *requester) loadCheckpoint(cursor cursor.Cursor) { + var nextCursorValue string + if cursor.IsNew() { + return + } + + if err := cursor.Unpack(&nextCursorValue); err != nil { + r.log.Errorf("Reset cursor position. Failed to read checkpoint from registry: %v", err) + return + } + + r.cursorValue = nextCursorValue +} diff --git a/x-pack/filebeat/tests/system/test_httpjson.py b/x-pack/filebeat/tests/system/test_httpjson.py new file mode 100644 index 00000000000..edb1a34312b --- /dev/null +++ b/x-pack/filebeat/tests/system/test_httpjson.py @@ -0,0 +1,609 @@ +import json +import jinja2 +import os +import random +import sys + +from datetime import datetime +from flask import Flask, jsonify, request +from multiprocessing import Process + +sys.path.append(os.path.join(os.path.dirname(__file__), + '../../../../filebeat/tests/system')) + +from filebeat import BaseTest + + +class Test(BaseTest): + """ + Test filebeat with the httpjson input + """ + @classmethod + def setUpClass(self): + self.beat_name = "filebeat" + self.beat_path = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../../")) + + super(BaseTest, self).setUpClass() + + def setUp(self): + super(BaseTest, self).setUp() + + # Hack to make jinja2 have the right paths + self.template_env = jinja2.Environment( + loader=jinja2.FileSystemLoader([ + os.path.abspath(os.path.join( + self.beat_path, "../../filebeat")), + os.path.abspath(os.path.join(self.beat_path, "../../libbeat")) + ]) + ) + + def set_config(self, extra_options=[]): + """ + General function so that we do not have to define settings each time + """ + options = ["- type: httpjson", "enabled: true"] + options.extend(extra_options) + + self.render_config_template( + input_raw='\n '.join(options), + inputs=False, + ) + + def start_server(self, method, handler, ssl=False): + """ + Creates a new http test server that will respond with the given handler + """ + app = Flask(__name__) + app.app_context().push() + + app.route('/', methods=[method])(handler) + + kwargs = {} + if ssl: + kwargs = {"ssl_context": "adhoc"} + + process = Process(target=app.run, kwargs=kwargs) + + def shutdown(): + app.do_teardown_appcontext() + process.terminate() + process.join() + + process.start() + + return shutdown + + def test_get(self): + """ + Test httpjson input performs a simple GET request correctly. + """ + + message = {"hello": "world"} + + def handler(): + return jsonify(message) + + shutdown_func = self.start_server("GET", handler) + + options = [ + "http_method: GET", + "interval: 0", + "url: http://localhost:5000" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 1)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message + + def test_get_https(self): + """ + Test httpjson input performs a simple GET request with HTTPS correctly. + """ + + message = {"hello": "world"} + + def handler(): + return jsonify(message) + + shutdown_func = self.start_server("GET", handler, ssl=True) + + options = [ + "http_method: GET", + "interval: 0", + "url: https://localhost:5000", + "ssl.verification_mode: none" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 1)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message + + def test_rate_limit_retry(self): + """ + Test httpjson input performs a retry when is rate limited. + """ + + message = {"hello": "world"} + + is_retry = False + + def handler(): + nonlocal is_retry + + resp = jsonify(message) + if is_retry: + return resp + + is_retry = True + resp.headers["X-Rate-Limit-Limit"] = "0" + resp.headers["X-Rate-Limit-Remaining"] = "0" + resp.headers["X-Rate-Limit-Reset"] = datetime.timestamp( + datetime.now()) + resp.status_code = 429 + + return resp + + shutdown_func = self.start_server("GET", handler) + + options = [ + "http_method: GET", + "interval: 0", + "url: http://localhost:5000" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 1)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message + + def test_error_retry(self): + """ + Test httpjson input performs a retry when the request fails. + """ + + message = {"hello": "world"} + + retry_count = 0 + + def handler(): + nonlocal retry_count + + resp = jsonify(message) + if retry_count == 2: + return resp + + retry_count += 1 + resp.status_code = random.randrange(500, 599) + + return resp + + shutdown_func = self.start_server("GET", handler) + + options = [ + "http_method: GET", + "interval: 0", + "url: http://localhost:5000" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 1)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message + + def test_array_response(self): + """ + Test httpjson input parses properly an array response. + """ + + message = { + "hello": [ + { + "foo": "bar", + "list": [ + {"foo": "bar"}, + {"hello": "world"} + ] + }, + { + "foo": "bar", + "list": [ + {"foo": "bar"} + ] + } + ] + } + + def handler(): + return jsonify(message) + + shutdown_func = self.start_server("GET", handler) + + options = [ + "http_method: GET", + "interval: 0", + "url: http://localhost:5000", + "json_objects_array: hello" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 2)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message["hello"][0] + assert json.loads(output[1]["message"]) == message["hello"][1] + + def test_post(self): + """ + Test httpjson input performs a simple POST request correctly. + """ + + message = {"hello": "world"} + + def handler(): + if request.get_json() != {"test":"abc"}: + resp = jsonify({"error":"got {}".format(request.get_data())}) + resp.status_code = 400 + return resp + return jsonify(message) + + shutdown_func = self.start_server("POST", handler) + + options = [ + "http_method: POST", + "interval: 0", + "url: http://localhost:5000", + "http_request_body:", + " test: abc" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 1)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message + + def test_repeated_post(self): + """ + Test httpjson input performs several POST requests correctly. + """ + + message = {"hello": "world"} + + def handler(): + if request.get_json() != {"test":"abc"}: + resp = jsonify({"error":"got {}".format(request.get_data())}) + resp.status_code = 400 + return resp + return jsonify(message) + + shutdown_func = self.start_server("POST", handler) + + options = [ + "http_method: POST", + "interval: 300ms", + "url: http://localhost:5000", + "http_request_body:", + " test: abc" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 3)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message + assert json.loads(output[1]["message"]) == message + assert json.loads(output[2]["message"]) == message + + def test_oauth2(self): + """ + Test httpjson input performs oauth2 requests correctly. + """ + + message = {"hello": "world"} + is_oauth2_token_request = True + def handler(): + nonlocal is_oauth2_token_request + if is_oauth2_token_request: + if request.method != "POST": + resp = jsonify({"error":"expected POST request"}) + resp.status_code = 400 + return resp + if request.values["grant_type"] != "client_credentials": + resp = jsonify({"error":"expected grant_type was client_credentials"}) + resp.status_code = 400 + return resp + if request.values["client_id"] != "a_client_id" or request.values["client_secret"] != "a_client_secret": + resp = jsonify({"error":"expected client credentials a_client_id:a_client_secret"}) + resp.status_code = 400 + return resp + if request.values["scope"] != "scope1 scope2": + resp = jsonify({"error":"expected scope was scope1+scope2"}) + resp.status_code = 400 + return resp + if request.values["param1"] != "v1": + resp = jsonify({"error":"expected param1 was v1"}) + resp.status_code = 400 + return resp + is_oauth2_token_request = False + return jsonify({"token_type":"Bearer", "expires_in":"60", "access_token":"abcd"}) + return jsonify(message) + + shutdown_func = self.start_server("POST", handler) + + options = [ + "http_method: POST", + "interval: 0", + "url: http://localhost:5000", + "oauth2.client.id: a_client_id", + "oauth2.client.secret: a_client_secret", + "oauth2.token_url: http://localhost:5000", + "oauth2.endpoint_params:", + " param1: v1", + "oauth2.scopes: [scope1, scope2]" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 1)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message + + def test_split_events_by(self): + """ + Test httpjson input splits events by key correctly. + """ + + message = { + "hello": "world", + "embedded": { + "hello": "world", + }, + "list": [ + {"foo": "bar"}, + {"hello": "world"} + ] + } + + def handler(): + return jsonify(message) + + shutdown_func = self.start_server("GET", handler) + + options = [ + "http_method: GET", + "interval: 0", + "url: http://localhost:5000", + "split_events_by: list" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 2)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + expected1 = { + "hello": "world", + "embedded": { + "hello": "world", + }, + "list": {"foo": "bar"} + } + + expected2 = { + "hello": "world", + "embedded": { + "hello": "world", + }, + "list": {"hello": "world"} + } + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == expected1 + assert json.loads(output[1]["message"]) == expected2 + + def test_split_events_by_not_found(self): + """ + Test httpjson input does not fail when split key is not found + """ + + message = {"hello": "world"} + + def handler(): + return jsonify(message) + + shutdown_func = self.start_server("GET", handler) + + options = [ + "http_method: GET", + "interval: 0", + "url: http://localhost:5000", + "split_events_by: list" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 1)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message + + def test_split_events_by_with_array(self): + """ + Test httpjson input generate events when splitting from a key inside a list + """ + + message = { + "objs": [ + { + "foo": "bar", + "list": [ + {"bar": "baz"}, + {"one": "two"} + ] + }, + {"foo": "bar"} + ] + } + + def handler(): + return jsonify(message) + + shutdown_func = self.start_server("GET", handler) + + options = [ + "http_method: GET", + "interval: 0", + "url: http://localhost:5000", + "json_objects_array: objs", + "split_events_by: list" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 3)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + expected1 = { + "foo": "bar", + "list": {"bar": "baz"} + } + + expected2 = { + "foo": "bar", + "list": {"one": "two"} + } + + expected3 = {"foo": "bar"} + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == expected1 + assert json.loads(output[1]["message"]) == expected2 + assert json.loads(output[2]["message"]) == expected3 + + def test_cursor(self): + """ + Test httpjson input works correctly with a date cursor + """ + + message = [ + {"@timestamp":"2002-10-02T15:00:00Z", "foo": "bar"}, + {"@timestamp":"2002-10-02T15:00:01Z", "foo": "bar"} + ] + + times = 0 + def handler(): + nonlocal times + if times == 1: + if request.values["$filter"] != "alertCreationTime ge 2002-10-02T15:00:01Z": + resp = jsonify({"error":"wrong filter"}) + resp.status_code = 400 + return resp + times += 1 + return jsonify(message) + + shutdown_func = self.start_server("GET", handler) + + options = [ + "http_method: GET", + "interval: 300ms", + "url: http://localhost:5000", + "date_cursor.field: \"@timestamp\"", + "date_cursor.url_field: $filter", + "date_cursor.value_template: alertCreationTime ge {{.}}", + "date_cursor.initial_interval: 10m", + "date_cursor.date_format: 2006-01-02T15:04:05Z", + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 4)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == message[0] + assert json.loads(output[1]["message"]) == message[1] + assert json.loads(output[2]["message"]) == message[0] + assert json.loads(output[3]["message"]) == message[1] From 28e5b853efac2080e1cfbb6f87bfeffcce7b6e97 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Fri, 24 Jul 2020 11:26:10 +0200 Subject: [PATCH 02/15] Add CHANGELOG entry --- CHANGELOG.next.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 5bc854185af..d7c7b17915d 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -532,6 +532,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Return error when log harvester tries to open a named pipe. {issue}18682[18682] {pull}20450[20450] - Avoid goroutine leaks in Filebeat readers. {issue}19193[19193] {pull}20455[20455] +- Use new cursor input for httpjson input {pull}20226[20226] *Heartbeat* From 9cb808bb4c08a5d019c10afd2fd21eef8857416e Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Fri, 24 Jul 2020 11:28:01 +0200 Subject: [PATCH 03/15] Fix format errors --- x-pack/filebeat/include/list.go | 1 - x-pack/filebeat/input/httpjson/input.go | 3 +- x-pack/filebeat/input/httpjson/requester.go | 9 ++++-- x-pack/filebeat/tests/system/test_httpjson.py | 28 ++++++++++--------- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/x-pack/filebeat/include/list.go b/x-pack/filebeat/include/list.go index 3cc9adb51d0..78262d08012 100644 --- a/x-pack/filebeat/include/list.go +++ b/x-pack/filebeat/include/list.go @@ -11,7 +11,6 @@ import ( _ "github.com/elastic/beats/v7/x-pack/filebeat/input/awscloudwatch" _ "github.com/elastic/beats/v7/x-pack/filebeat/input/azureeventhub" _ "github.com/elastic/beats/v7/x-pack/filebeat/input/googlepubsub" - _ "github.com/elastic/beats/v7/x-pack/filebeat/input/httpjson" _ "github.com/elastic/beats/v7/x-pack/filebeat/input/netflow" _ "github.com/elastic/beats/v7/x-pack/filebeat/input/s3" _ "github.com/elastic/beats/v7/x-pack/filebeat/module/activemq" diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index 333547ccc25..5a944387a3d 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -11,6 +11,8 @@ import ( "net/url" "time" + "github.com/hashicorp/go-retryablehttp" + input "github.com/elastic/beats/v7/filebeat/input/v2" cursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" "github.com/elastic/beats/v7/libbeat/beat" @@ -20,7 +22,6 @@ import ( "github.com/elastic/beats/v7/libbeat/feature" "github.com/elastic/beats/v7/libbeat/logp" "github.com/elastic/go-concert/ctxtool" - "github.com/hashicorp/go-retryablehttp" ) const ( diff --git a/x-pack/filebeat/input/httpjson/requester.go b/x-pack/filebeat/input/httpjson/requester.go index a580296df91..8e22f2b31c7 100644 --- a/x-pack/filebeat/input/httpjson/requester.go +++ b/x-pack/filebeat/input/httpjson/requester.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package httpjson import ( @@ -13,11 +17,12 @@ import ( "strconv" "time" + "github.com/pkg/errors" + "go.uber.org/zap" + cursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" "github.com/elastic/beats/v7/libbeat/common" "github.com/elastic/beats/v7/libbeat/logp" - "github.com/pkg/errors" - "go.uber.org/zap" ) type requestInfo struct { diff --git a/x-pack/filebeat/tests/system/test_httpjson.py b/x-pack/filebeat/tests/system/test_httpjson.py index edb1a34312b..96e41c7f782 100644 --- a/x-pack/filebeat/tests/system/test_httpjson.py +++ b/x-pack/filebeat/tests/system/test_httpjson.py @@ -282,8 +282,8 @@ def test_post(self): message = {"hello": "world"} def handler(): - if request.get_json() != {"test":"abc"}: - resp = jsonify({"error":"got {}".format(request.get_data())}) + if request.get_json() != {"test": "abc"}: + resp = jsonify({"error": "got {}".format(request.get_data())}) resp.status_code = 400 return resp return jsonify(message) @@ -319,8 +319,8 @@ def test_repeated_post(self): message = {"hello": "world"} def handler(): - if request.get_json() != {"test":"abc"}: - resp = jsonify({"error":"got {}".format(request.get_data())}) + if request.get_json() != {"test": "abc"}: + resp = jsonify({"error": "got {}".format(request.get_data())}) resp.status_code = 400 return resp return jsonify(message) @@ -357,31 +357,32 @@ def test_oauth2(self): message = {"hello": "world"} is_oauth2_token_request = True + def handler(): nonlocal is_oauth2_token_request if is_oauth2_token_request: if request.method != "POST": - resp = jsonify({"error":"expected POST request"}) + resp = jsonify({"error": "expected POST request"}) resp.status_code = 400 return resp if request.values["grant_type"] != "client_credentials": - resp = jsonify({"error":"expected grant_type was client_credentials"}) + resp = jsonify({"error": "expected grant_type was client_credentials"}) resp.status_code = 400 return resp if request.values["client_id"] != "a_client_id" or request.values["client_secret"] != "a_client_secret": - resp = jsonify({"error":"expected client credentials a_client_id:a_client_secret"}) + resp = jsonify({"error": "expected client credentials a_client_id:a_client_secret"}) resp.status_code = 400 return resp if request.values["scope"] != "scope1 scope2": - resp = jsonify({"error":"expected scope was scope1+scope2"}) + resp = jsonify({"error": "expected scope was scope1+scope2"}) resp.status_code = 400 return resp if request.values["param1"] != "v1": - resp = jsonify({"error":"expected param1 was v1"}) + resp = jsonify({"error": "expected param1 was v1"}) resp.status_code = 400 return resp is_oauth2_token_request = False - return jsonify({"token_type":"Bearer", "expires_in":"60", "access_token":"abcd"}) + return jsonify({"token_type": "Bearer", "expires_in": "60", "access_token": "abcd"}) return jsonify(message) shutdown_func = self.start_server("POST", handler) @@ -564,16 +565,17 @@ def test_cursor(self): """ message = [ - {"@timestamp":"2002-10-02T15:00:00Z", "foo": "bar"}, - {"@timestamp":"2002-10-02T15:00:01Z", "foo": "bar"} + {"@timestamp": "2002-10-02T15:00:00Z", "foo": "bar"}, + {"@timestamp": "2002-10-02T15:00:01Z", "foo": "bar"} ] times = 0 + def handler(): nonlocal times if times == 1: if request.values["$filter"] != "alertCreationTime ge 2002-10-02T15:00:01Z": - resp = jsonify({"error":"wrong filter"}) + resp = jsonify({"error": "wrong filter"}) resp.status_code = 400 return resp times += 1 From b21108f7a7d3224e7b9d43382a61501da48bc396 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Fri, 31 Jul 2020 09:05:47 +0200 Subject: [PATCH 04/15] Convert to stateless input and refactor: - Paginator takes care of requesting next page info - Rate limiter takes care of rate limiting requests - Date cursor takes care of keeping track of cursor state --- CHANGELOG.next.asciidoc | 1 + .../filebeat/input/default-inputs/inputs.go | 2 +- .../filebeat/input/httpjson/config_oauth.go | 4 +- x-pack/filebeat/input/httpjson/date_cursor.go | 111 ++++ x-pack/filebeat/input/httpjson/input.go | 160 +++--- x-pack/filebeat/input/httpjson/pagination.go | 126 +++++ .../{httpjson_test.go => pagination_test.go} | 78 +-- .../filebeat/input/httpjson/rate_limiter.go | 131 +++++ .../input/httpjson/rate_limiter_test.go | 64 +++ x-pack/filebeat/input/httpjson/requester.go | 501 ++++++------------ x-pack/filebeat/tests/system/test_httpjson.py | 48 ++ 11 files changed, 739 insertions(+), 487 deletions(-) create mode 100644 x-pack/filebeat/input/httpjson/date_cursor.go create mode 100644 x-pack/filebeat/input/httpjson/pagination.go rename x-pack/filebeat/input/httpjson/{httpjson_test.go => pagination_test.go} (50%) create mode 100644 x-pack/filebeat/input/httpjson/rate_limiter.go create mode 100644 x-pack/filebeat/input/httpjson/rate_limiter_test.go diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index d7c7b17915d..b690d573ba3 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -533,6 +533,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Avoid goroutine leaks in Filebeat readers. {issue}19193[19193] {pull}20455[20455] - Use new cursor input for httpjson input {pull}20226[20226] +- Convert httpjson to v2 input {pull}20226[20226] *Heartbeat* diff --git a/x-pack/filebeat/input/default-inputs/inputs.go b/x-pack/filebeat/input/default-inputs/inputs.go index 5eade46eafa..1fe245b80f7 100644 --- a/x-pack/filebeat/input/default-inputs/inputs.go +++ b/x-pack/filebeat/input/default-inputs/inputs.go @@ -27,7 +27,7 @@ func xpackInputs(info beat.Info, log *logp.Logger, store beater.StateStore) []v2 return []v2.Plugin{ cloudfoundry.Plugin(), http_endpoint.Plugin(), + httpjson.Plugin(), o365audit.Plugin(log, store), - httpjson.Plugin(log, store), } } diff --git a/x-pack/filebeat/input/httpjson/config_oauth.go b/x-pack/filebeat/input/httpjson/config_oauth.go index 8f2daa06dc5..6a09cf2fb92 100644 --- a/x-pack/filebeat/input/httpjson/config_oauth.go +++ b/x-pack/filebeat/input/httpjson/config_oauth.go @@ -66,8 +66,8 @@ func (o *OAuth2) IsEnabled() bool { } // Client wraps the given http.Client and returns a new one that will use the oauth authentication. -func (o *OAuth2) Client(client *http.Client) (*http.Client, error) { - ctx := context.WithValue(context.Background(), oauth2.HTTPClient, client) +func (o *OAuth2) Client(ctx context.Context, client *http.Client) (*http.Client, error) { + ctx = context.WithValue(ctx, oauth2.HTTPClient, client) switch o.GetProvider() { case OAuth2ProviderAzure, OAuth2ProviderDefault: diff --git a/x-pack/filebeat/input/httpjson/date_cursor.go b/x-pack/filebeat/input/httpjson/date_cursor.go new file mode 100644 index 00000000000..1f18673d181 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/date_cursor.go @@ -0,0 +1,111 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package httpjson + +import ( + "bytes" + "net/url" + "time" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +type dateCursor struct { + log *logp.Logger + enabled bool + field string + url string + urlField string + initialInterval time.Duration + dateFormat string + + value string + valueTpl *Template +} + +func newDateCursorFromConfig(config config, log *logp.Logger) *dateCursor { + c := &dateCursor{ + enabled: config.DateCursor.IsEnabled(), + url: config.URL, + } + + if !c.enabled { + return c + } + + c.log = log + c.field = config.DateCursor.Field + c.url = config.URL + c.urlField = config.DateCursor.URLField + c.initialInterval = config.DateCursor.InitialInterval + c.dateFormat = config.DateCursor.GetDateFormat() + c.valueTpl = config.DateCursor.ValueTemplate + + return c +} + +func (c *dateCursor) getURL() string { + if !c.enabled { + return c.url + } + + var dateStr string + if c.value == "" { + t := timeNow().UTC().Add(-c.initialInterval) + dateStr = t.Format(c.dateFormat) + } else { + dateStr = c.value + } + + url, err := url.Parse(c.url) + if err != nil { + return c.url + } + + q := url.Query() + + var value string + if c.valueTpl == nil { + value = dateStr + } else { + buf := new(bytes.Buffer) + if err := c.valueTpl.Template.Execute(buf, dateStr); err != nil { + return c.url + } + value = buf.String() + } + + q.Set(c.urlField, value) + + url.RawQuery = q.Encode() + + return url.String() +} + +func (c *dateCursor) advance(m common.MapStr) { + if c.field == "" { + c.value = time.Now().UTC().Format(c.dateFormat) + return + } + + v, err := m.GetValue(c.field) + if err != nil { + c.log.Warnf("date_cursor field: %q", err) + return + } + switch t := v.(type) { + case string: + _, err := time.Parse(c.dateFormat, t) + if err != nil { + c.log.Warn("date_cursor field does not have the expected layout") + return + } + c.value = t + default: + c.log.Warn("date_cursor field must be a string, cursor will not advance") + return + } +} diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index 5a944387a3d..fa9343084cd 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -5,6 +5,7 @@ package httpjson import ( + "context" "fmt" "net" "net/http" @@ -12,9 +13,10 @@ import ( "time" "github.com/hashicorp/go-retryablehttp" + "go.uber.org/zap" - input "github.com/elastic/beats/v7/filebeat/input/v2" - cursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + stateless "github.com/elastic/beats/v7/filebeat/input/v2/input-stateless" "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/beats/v7/libbeat/common" "github.com/elastic/beats/v7/libbeat/common/transport/tlscommon" @@ -35,48 +37,75 @@ var ( timeNow = time.Now ) -// httpJSONInput struct has the HttpJsonInput configuration and other userful info. -type httpJSONInput struct{} +type retryLogger struct { + log *logp.Logger +} + +func newRetryLogger() *retryLogger { + return &retryLogger{ + log: logp.NewLogger("httpjson.retryablehttp", zap.AddCallerSkip(1)), + } +} + +func (log *retryLogger) Error(format string, args ...interface{}) { + log.log.Errorf(format, args...) +} + +func (log *retryLogger) Info(format string, args ...interface{}) { + log.log.Infof(format, args...) +} + +func (log *retryLogger) Debug(format string, args ...interface{}) { + log.log.Debugf(format, args...) +} -// Plugin create a stateful input Plugin collecting logs from HTTPJSONInput. -func Plugin(log *logp.Logger, store cursor.StateStore) input.Plugin { - return input.Plugin{ +func (log *retryLogger) Warn(format string, args ...interface{}) { + log.log.Warnf(format, args...) +} + +type httpJSONInput struct { + config config + tlsConfig *tlscommon.TLSConfig +} + +func Plugin() v2.Plugin { + return v2.Plugin{ Name: inputName, Stability: feature.Beta, Deprecated: false, - Info: "HTTP JSON Input", - Manager: &cursor.InputManager{ - Logger: log.Named(inputName), - StateStore: store, - Type: inputName, - Configure: configure, - }, + Manager: stateless.NewInputManager(configure), } } -func configure(cfg *common.Config) ([]cursor.Source, cursor.Input, error) { - config := defaultConfig() - if err := cfg.Unpack(&config); err != nil { - return nil, nil, err +func configure(cfg *common.Config) (stateless.Input, error) { + conf := defaultConfig() + if err := cfg.Unpack(&conf); err != nil { + return nil, err } - httpClient, err := newHTTPClient(config) - if err != nil { - return nil, nil, err - } + return newHTTPJSONInput(conf) +} - r := &requester{config: config, client: httpClient} +func newHTTPJSONInput(config config) (*httpJSONInput, error) { + if err := config.Validate(); err != nil { + return nil, err + } - in := &httpJSONInput{} + tlsConfig, err := tlscommon.LoadTLSConfig(config.TLS) + if err != nil { + return nil, err + } - return []cursor.Source{r}, in, nil + return &httpJSONInput{ + config: config, + tlsConfig: tlsConfig, + }, nil } func (*httpJSONInput) Name() string { return inputName } -func (*httpJSONInput) Test(source cursor.Source, ctx input.TestContext) error { - requester := source.(*requester) - url, err := url.Parse(requester.config.URL) +func (in *httpJSONInput) Test(v2.TestContext) error { + url, err := url.Parse(in.config.URL) if err != nil { return err } @@ -92,9 +121,9 @@ func (*httpJSONInput) Test(source cursor.Source, ctx input.TestContext) error { return "80" }() - _, err = net.DialTimeout("tcp", fmt.Sprintf("%s:%s", url.Hostname(), port), time.Second) + _, err = net.DialTimeout("tcp", net.JoinHostPort(url.Hostname(), port), time.Second) if err != nil { - return fmt.Errorf("url %q is unreachable", requester.config.URL) + return fmt.Errorf("url %q is unreachable", in.config.URL) } return nil @@ -102,34 +131,34 @@ func (*httpJSONInput) Test(source cursor.Source, ctx input.TestContext) error { // Run starts the input worker then returns. Only the first invocation // will ever start the worker. -func (in *httpJSONInput) Run( - ctx input.Context, - source cursor.Source, - cursor cursor.Cursor, - publisher cursor.Publisher, -) error { - requester := source.(*requester) - - log := ctx.Logger.With("url", requester.config.URL) - requester.log = log - - requester.loadCheckpoint(cursor) +func (in *httpJSONInput) Run(ctx v2.Context, publisher stateless.Publisher) error { + log := ctx.Logger.With("url", in.config.URL) stdCtx := ctxtool.FromCanceller(ctx.Cancelation) - ri := &requestInfo{ - contentMap: common.MapStr{}, - headers: requester.config.HTTPHeaders, + httpClient, err := in.newHTTPClient(stdCtx) + if err != nil { + return err } - if requester.config.HTTPMethod == "POST" && - requester.config.HTTPRequestBody != nil { - ri.contentMap.Update(common.MapStr(requester.config.HTTPRequestBody)) - } + dateCursor := newDateCursorFromConfig(in.config, log) + + rateLimiter := newRateLimiterFromConfig(in.config, log) + + pagination := newPaginationFromConfig(in.config) - err := requester.processHTTPRequest(stdCtx, publisher, ri) - if err == nil && requester.config.Interval > 0 { - ticker := time.NewTicker(requester.config.Interval) + requester := newRequester( + in.config, + rateLimiter, + dateCursor, + pagination, + httpClient, + log, + ) + + err = requester.processHTTPRequest(stdCtx, publisher) + if err == nil && in.config.Interval > 0 { + ticker := time.NewTicker(in.config.Interval) defer ticker.Stop() for { select { @@ -138,7 +167,7 @@ func (in *httpJSONInput) Run( return nil case <-ticker.C: log.Info("Process another repeated request.") - err = requester.processHTTPRequest(stdCtx, publisher, ri) + err = requester.processHTTPRequest(stdCtx, publisher) if err != nil { return err } @@ -149,34 +178,29 @@ func (in *httpJSONInput) Run( return err } -func newHTTPClient(config config) (*http.Client, error) { - tlsConfig, err := tlscommon.LoadTLSConfig(config.TLS) - if err != nil { - return nil, err - } - +func (in *httpJSONInput) newHTTPClient(ctx context.Context) (*http.Client, error) { // Make retryable HTTP client - var client *retryablehttp.Client = &retryablehttp.Client{ + client := &retryablehttp.Client{ HTTPClient: &http.Client{ Transport: &http.Transport{ DialContext: (&net.Dialer{ - Timeout: config.HTTPClientTimeout, + Timeout: in.config.HTTPClientTimeout, }).DialContext, - TLSClientConfig: tlsConfig.ToConfig(), + TLSClientConfig: in.tlsConfig.ToConfig(), DisableKeepAlives: true, }, - Timeout: config.HTTPClientTimeout, + Timeout: in.config.HTTPClientTimeout, }, Logger: newRetryLogger(), - RetryWaitMin: config.RetryWaitMin, - RetryWaitMax: config.RetryWaitMax, - RetryMax: config.RetryMax, + RetryWaitMin: in.config.RetryWaitMin, + RetryWaitMax: in.config.RetryWaitMax, + RetryMax: in.config.RetryMax, CheckRetry: retryablehttp.DefaultRetryPolicy, Backoff: retryablehttp.DefaultBackoff, } - if config.OAuth2.IsEnabled() { - return config.OAuth2.Client(client.StandardClient()) + if in.config.OAuth2.IsEnabled() { + return in.config.OAuth2.Client(ctx, client.StandardClient()) } return client.StandardClient(), nil diff --git a/x-pack/filebeat/input/httpjson/pagination.go b/x-pack/filebeat/input/httpjson/pagination.go new file mode 100644 index 00000000000..e334761c782 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/pagination.go @@ -0,0 +1,126 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package httpjson + +import ( + "fmt" + "net/http" + "net/url" + "regexp" + + "github.com/pkg/errors" + + "github.com/elastic/beats/v7/libbeat/common" +) + +type pagination struct { + extraBodyContent common.MapStr + header *Header + idField string + requestField string + urlField string + url string +} + +func newPaginationFromConfig(config config) *pagination { + if !config.Pagination.IsEnabled() { + return nil + } + return &pagination{ + extraBodyContent: config.Pagination.ExtraBodyContent.Clone(), + header: config.Pagination.Header, + idField: config.Pagination.IDField, + requestField: config.Pagination.RequestField, + urlField: config.Pagination.URLField, + url: config.Pagination.URL, + } +} + +func (p *pagination) nextRequestInfo(ri *requestInfo, response response, lastObj common.MapStr) (*requestInfo, bool, error) { + if p == nil { + return ri, false, nil + } + + if p.header == nil { + var err error + // Pagination control using HTTP Body fields + if err = p.setRequestInfoFromBody(response.body, lastObj, ri); err != nil { + return ri, false, err + } + + return ri, true, nil + } + + // Pagination control using HTTP Header + url, err := getNextLinkFromHeader(response.header, p.header.FieldName, p.header.RegexPattern) + if err != nil { + return ri, false, errors.Wrapf(err, "failed to retrieve the next URL for pagination") + } + if ri.url == url || url == "" { + return ri, false, nil + } + + ri.url = url + + return ri, true, nil +} + +// getNextLinkFromHeader retrieves the next URL for pagination from the HTTP Header of the response +func getNextLinkFromHeader(header http.Header, fieldName string, re *regexp.Regexp) (string, error) { + links, ok := header[fieldName] + if !ok { + return "", errors.Errorf("field %s does not exist in the HTTP Header", fieldName) + } + for _, link := range links { + matchArray := re.FindAllStringSubmatch(link, -1) + if len(matchArray) == 1 { + return matchArray[0][1], nil + } + } + return "", nil +} + +// createRequestInfoFromBody creates a new RequestInfo for a new HTTP request in pagination based on HTTP response body +func (p *pagination) setRequestInfoFromBody(response, last common.MapStr, ri *requestInfo) error { + // we try to get it from last element, if not found, from the original response + v, err := last.GetValue(p.idField) + if err == common.ErrKeyNotFound { + v, err = response.GetValue(p.idField) + } + + if err == common.ErrKeyNotFound { + return nil + } + + if err != nil { + return errors.Wrapf(err, "failed to retrieve id_field for pagination") + } + + if p.requestField != "" { + ri.contentMap.Put(p.requestField, v) + if p.url != "" { + ri.url = p.url + } + } else if p.urlField != "" { + url, err := url.Parse(ri.url) + if err == nil { + q := url.Query() + q.Set(p.urlField, fmt.Sprint(v)) + url.RawQuery = q.Encode() + ri.url = url.String() + } + } else { + switch vt := v.(type) { + case string: + ri.url = vt + default: + return errors.New("pagination ID is not of string type") + } + } + if len(p.extraBodyContent) > 0 { + ri.contentMap.Update(common.MapStr(p.extraBodyContent)) + } + return nil +} diff --git a/x-pack/filebeat/input/httpjson/httpjson_test.go b/x-pack/filebeat/input/httpjson/pagination_test.go similarity index 50% rename from x-pack/filebeat/input/httpjson/httpjson_test.go rename to x-pack/filebeat/input/httpjson/pagination_test.go index e9f3af51dad..9b04de75819 100644 --- a/x-pack/filebeat/input/httpjson/httpjson_test.go +++ b/x-pack/filebeat/input/httpjson/pagination_test.go @@ -7,9 +7,7 @@ package httpjson import ( "net/http" "regexp" - "strconv" "testing" - "time" "github.com/elastic/beats/v7/libbeat/common" ) @@ -33,21 +31,21 @@ func TestCreateRequestInfoFromBody(t *testing.T) { "id": 100, } extraBodyContent := common.MapStr{"extra_body": "abc"} - config := &Pagination{ - IDField: "id", - RequestField: "pagination_id", - ExtraBodyContent: extraBodyContent, - URL: "https://test-123", + pagination := &pagination{ + idField: "id", + requestField: "pagination_id", + extraBodyContent: extraBodyContent, + url: "https://test-123", } - ri, err := createRequestInfoFromBody( - config, + ri := &requestInfo{ + url: "", + contentMap: common.MapStr{}, + headers: common.MapStr{}, + } + err := pagination.setRequestInfoFromBody( common.MapStr(m), common.MapStr(m), - &requestInfo{ - url: "", - contentMap: common.MapStr{}, - headers: common.MapStr{}, - }, + ri, ) if ri.url != "https://test-123" { t.Fatal("Failed to test createRequestInfoFromBody. URL should be https://test-123.") @@ -77,55 +75,3 @@ func TestCreateRequestInfoFromBody(t *testing.T) { t.Fatalf("Failed to test createRequestInfoFromBody. extra_body type %T should be string.", bt) } } - -// Test getRateLimit function with a remaining quota, expect to receive 0, nil. -func TestGetRateLimitCase1(t *testing.T) { - header := make(http.Header) - header.Add("X-Rate-Limit-Limit", "120") - header.Add("X-Rate-Limit-Remaining", "118") - header.Add("X-Rate-Limit-Reset", "1581658643") - rateLimit := &RateLimit{ - Limit: "X-Rate-Limit-Limit", - Reset: "X-Rate-Limit-Reset", - Remaining: "X-Rate-Limit-Remaining", - } - epoch, err := getRateLimit(header, rateLimit) - if err != nil || epoch != 0 { - t.Fatal("Failed to test getRateLimit.") - } -} - -// Test getRateLimit function with a past time, expect to receive 0, nil. -func TestGetRateLimitCase2(t *testing.T) { - header := make(http.Header) - header.Add("X-Rate-Limit-Limit", "10") - header.Add("X-Rate-Limit-Remaining", "0") - header.Add("X-Rate-Limit-Reset", "1581658643") - rateLimit := &RateLimit{ - Limit: "X-Rate-Limit-Limit", - Reset: "X-Rate-Limit-Reset", - Remaining: "X-Rate-Limit-Remaining", - } - epoch, err := getRateLimit(header, rateLimit) - if err != nil || epoch != 0 { - t.Fatal("Failed to test getRateLimit.") - } -} - -// Test getRateLimit function with a time yet to come, expect to receive , nil. -func TestGetRateLimitCase3(t *testing.T) { - epoch := time.Now().Unix() + 100 - header := make(http.Header) - header.Add("X-Rate-Limit-Limit", "10") - header.Add("X-Rate-Limit-Remaining", "0") - header.Add("X-Rate-Limit-Reset", strconv.FormatInt(epoch, 10)) - rateLimit := &RateLimit{ - Limit: "X-Rate-Limit-Limit", - Reset: "X-Rate-Limit-Reset", - Remaining: "X-Rate-Limit-Remaining", - } - epoch2, err := getRateLimit(header, rateLimit) - if err != nil || epoch2 != epoch { - t.Fatal("Failed to test getRateLimit.") - } -} diff --git a/x-pack/filebeat/input/httpjson/rate_limiter.go b/x-pack/filebeat/input/httpjson/rate_limiter.go new file mode 100644 index 00000000000..95cc969e1d5 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/rate_limiter.go @@ -0,0 +1,131 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package httpjson + +import ( + "context" + "net/http" + "strconv" + "time" + + "github.com/pkg/errors" + + "github.com/elastic/beats/v7/libbeat/logp" +) + +type rateLimiter struct { + log *logp.Logger + + limit string + reset string + remaining string +} + +func newRateLimiterFromConfig(config config, log *logp.Logger) *rateLimiter { + if config.RateLimit == nil { + return nil + } + + return &rateLimiter{ + log: log, + limit: config.RateLimit.Limit, + reset: config.RateLimit.Reset, + remaining: config.RateLimit.Remaining, + } +} + +func (r *rateLimiter) execute(ctx context.Context, f func(context.Context) (*http.Response, error)) (*http.Response, error) { + for { + resp, err := f(ctx) + if err != nil { + return nil, err + } + + header := resp.Header + if err != nil { + return nil, errors.Wrapf(err, "failed to read http.response.body") + } + + if r == nil || resp.StatusCode == http.StatusOK { + return resp, nil + } + + if resp.StatusCode != http.StatusTooManyRequests { + return nil, errors.Errorf("http request was unsuccessful with a status code %d", resp.StatusCode) + } + + if err := r.applyRateLimit(ctx, header); err != nil { + return nil, err + } + } +} + +// applyRateLimit applies appropriate rate limit if specified in the HTTP Header of the response +func (r *rateLimiter) applyRateLimit(ctx context.Context, header http.Header) error { + epoch, err := r.getRateLimit(header) + if err != nil { + return err + } + + t := time.Unix(epoch, 0) + w := time.Until(t) + if epoch == 0 || w <= 0 { + r.log.Debugf("Rate Limit: No need to apply rate limit.") + return nil + } + r.log.Debugf("Rate Limit: Wait until %v for the rate limit to reset.", t) + ticker := time.NewTicker(w) + defer ticker.Stop() + + select { + case <-ctx.Done(): + r.log.Info("Context done.") + return nil + case <-ticker.C: + r.log.Debug("Rate Limit: time is up.") + return nil + } +} + +// getRateLimit gets the rate limit value if specified in the HTTP Header of the response, +// and returns an int64 value in seconds since unix epoch for rate limit reset time. +// When there is a remaining rate limit quota, or when the rate limit reset time has expired, it +// returns 0 for the epoch value. +func (r *rateLimiter) getRateLimit(header http.Header) (int64, error) { + if r == nil { + return 0, nil + } + + if r.remaining == "" { + return 0, nil + } + + remaining := header.Get(r.remaining) + if remaining == "" { + return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", r.remaining) + } + m, err := strconv.ParseInt(remaining, 10, 64) + if err != nil { + return 0, errors.Wrapf(err, "failed to parse rate-limit remaining value") + } + + if m != 0 { + return 0, nil + } + + reset := header.Get(r.reset) + if reset == "" { + return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", r.reset) + } + epoch, err := strconv.ParseInt(reset, 10, 64) + if err != nil { + return 0, errors.Wrapf(err, "failed to parse rate-limit reset value") + } + if time.Unix(epoch, 0).Sub(time.Now()) <= 0 { + return 0, nil + } + + return epoch, nil +} diff --git a/x-pack/filebeat/input/httpjson/rate_limiter_test.go b/x-pack/filebeat/input/httpjson/rate_limiter_test.go new file mode 100644 index 00000000000..e349e725f2f --- /dev/null +++ b/x-pack/filebeat/input/httpjson/rate_limiter_test.go @@ -0,0 +1,64 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package httpjson + +import ( + "net/http" + "strconv" + "testing" + "time" +) + +// Test getRateLimit function with a remaining quota, expect to receive 0, nil. +func TestGetRateLimitCase1(t *testing.T) { + header := make(http.Header) + header.Add("X-Rate-Limit-Limit", "120") + header.Add("X-Rate-Limit-Remaining", "118") + header.Add("X-Rate-Limit-Reset", "1581658643") + rateLimit := &rateLimiter{ + limit: "X-Rate-Limit-Limit", + reset: "X-Rate-Limit-Reset", + remaining: "X-Rate-Limit-Remaining", + } + epoch, err := rateLimit.getRateLimit(header) + if err != nil || epoch != 0 { + t.Fatal("Failed to test getRateLimit.") + } +} + +// Test getRateLimit function with a past time, expect to receive 0, nil. +func TestGetRateLimitCase2(t *testing.T) { + header := make(http.Header) + header.Add("X-Rate-Limit-Limit", "10") + header.Add("X-Rate-Limit-Remaining", "0") + header.Add("X-Rate-Limit-Reset", "1581658643") + rateLimit := &rateLimiter{ + limit: "X-Rate-Limit-Limit", + reset: "X-Rate-Limit-Reset", + remaining: "X-Rate-Limit-Remaining", + } + epoch, err := rateLimit.getRateLimit(header) + if err != nil || epoch != 0 { + t.Fatal("Failed to test getRateLimit.") + } +} + +// Test getRateLimit function with a time yet to come, expect to receive , nil. +func TestGetRateLimitCase3(t *testing.T) { + epoch := time.Now().Unix() + 100 + header := make(http.Header) + header.Add("X-Rate-Limit-Limit", "10") + header.Add("X-Rate-Limit-Remaining", "0") + header.Add("X-Rate-Limit-Reset", strconv.FormatInt(epoch, 10)) + rateLimit := &rateLimiter{ + limit: "X-Rate-Limit-Limit", + reset: "X-Rate-Limit-Reset", + remaining: "X-Rate-Limit-Remaining", + } + epoch2, err := rateLimit.getRateLimit(header) + if err != nil || epoch2 != epoch { + t.Fatal("Failed to test getRateLimit.") + } +} diff --git a/x-pack/filebeat/input/httpjson/requester.go b/x-pack/filebeat/input/httpjson/requester.go index 8e22f2b31c7..c5952f860cd 100644 --- a/x-pack/filebeat/input/httpjson/requester.go +++ b/x-pack/filebeat/input/httpjson/requester.go @@ -8,19 +8,13 @@ import ( "bytes" "context" "encoding/json" - "fmt" "io" "io/ioutil" "net/http" - "net/url" - "regexp" - "strconv" - "time" "github.com/pkg/errors" - "go.uber.org/zap" - cursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + stateless "github.com/elastic/beats/v7/filebeat/input/v2/input-stateless" "github.com/elastic/beats/v7/libbeat/common" "github.com/elastic/beats/v7/libbeat/logp" ) @@ -31,33 +25,157 @@ type requestInfo struct { headers common.MapStr } -type retryLogger struct { - log *logp.Logger +type requester struct { + log *logp.Logger + client *http.Client + dateCursor *dateCursor + rateLimiter *rateLimiter + pagination *pagination + + method string + reqBody common.MapStr + headers common.MapStr + noHTTPBody bool + apiKey string + authScheme string + jsonObjects string + splitEventsBy string } -func newRetryLogger() *retryLogger { - return &retryLogger{ - log: logp.NewLogger("httpjson.retryablehttp", zap.AddCallerSkip(1)), +func newRequester( + config config, + rateLimiter *rateLimiter, + dateCursor *dateCursor, + pagination *pagination, + client *http.Client, + log *logp.Logger) *requester { + return &requester{ + log: log, + client: client, + rateLimiter: rateLimiter, + dateCursor: dateCursor, + pagination: pagination, + method: config.HTTPMethod, + reqBody: config.HTTPRequestBody.Clone(), + headers: config.HTTPHeaders.Clone(), + noHTTPBody: config.NoHTTPBody, + apiKey: config.APIKey, + authScheme: config.AuthenticationScheme, + splitEventsBy: config.SplitEventsBy, + jsonObjects: config.JSONObjects, } } -func (l *retryLogger) Printf(s string, args ...interface{}) { - l.log.Debugf(s, args...) +type response struct { + header http.Header + body common.MapStr } -type requester struct { - log *logp.Logger - config config - client *http.Client - cursorValue string -} +// processHTTPRequest processes HTTP request, and handles pagination if enabled +func (r *requester) processHTTPRequest(ctx context.Context, publisher stateless.Publisher) error { + ri := &requestInfo{ + url: r.dateCursor.getURL(), + contentMap: common.MapStr{}, + headers: r.headers, + } + + if r.method == "POST" && r.reqBody != nil { + ri.contentMap.Update(common.MapStr(r.reqBody)) + } + + var ( + m, v interface{} + response response + lastObj common.MapStr + ) + + // always request at least once + hasNext := true + + for hasNext { + resp, err := r.rateLimiter.execute( + ctx, + func(ctx context.Context) (*http.Response, error) { + req, err := r.createHTTPRequest(ctx, ri) + if err != nil { + return nil, errors.Wrapf(err, "failed to create http request") + } + msg, err := r.client.Do(req) + if err != nil { + return nil, errors.Wrapf(err, "failed to execute http client.Do") + } + return msg, nil + }, + ) + if err != nil { + return err + } + + responseData, err := ioutil.ReadAll(resp.Body) + if err != nil { + return errors.Wrapf(err, "failed to read http response") + } + _ = resp.Body.Close() + + if err = json.Unmarshal(responseData, &m); err != nil { + r.log.Debug("failed to unmarshal http.response.body", string(responseData)) + return errors.Wrapf(err, "failed to unmarshal http.response.body") + } + + switch obj := m.(type) { + // Top level Array + case []interface{}: + lastObj, err = r.processEventArray(publisher, obj) + if err != nil { + return err + } + case map[string]interface{}: + response.body = obj + if r.jsonObjects == "" { + lastObj, err = r.processEventArray(publisher, []interface{}{obj}) + if err != nil { + return err + } + } else { + v, err = common.MapStr(obj).GetValue(r.jsonObjects) + if err != nil { + if err == common.ErrKeyNotFound { + break + } + return err + } + switch ts := v.(type) { + case []interface{}: + lastObj, err = r.processEventArray(publisher, ts) + if err != nil { + return err + } + default: + return errors.Errorf("content of %s is not a valid array", r.jsonObjects) + } + } + default: + r.log.Debug("http.response.body is not a valid JSON object", string(responseData)) + return errors.Errorf("http.response.body is not a valid JSON object, but a %T", obj) + } + + ri, hasNext, err = r.pagination.nextRequestInfo(ri, response, lastObj) + if err != nil { + return err + } + } -func (r requester) Name() string { return r.config.URL } + if lastObj != nil && r.dateCursor.enabled { + r.dateCursor.advance(common.MapStr(lastObj)) + } + + return nil +} // createHTTPRequest creates an HTTP/HTTPs request for the input func (r *requester) createHTTPRequest(ctx context.Context, ri *requestInfo) (*http.Request, error) { var body io.Reader - if len(ri.contentMap) == 0 || r.config.NoHTTPBody { + if len(ri.contentMap) == 0 || r.noHTTPBody { body = nil } else { b, err := json.Marshal(ri.contentMap) @@ -66,7 +184,7 @@ func (r *requester) createHTTPRequest(ctx context.Context, ri *requestInfo) (*ht } body = bytes.NewReader(b) } - req, err := http.NewRequest(r.config.HTTPMethod, ri.url, body) + req, err := http.NewRequest(r.method, ri.url, body) if err != nil { return nil, err } @@ -74,11 +192,11 @@ func (r *requester) createHTTPRequest(ctx context.Context, ri *requestInfo) (*ht req.Header.Set("Accept", "application/json") req.Header.Set("Content-Type", "application/json") req.Header.Set("User-Agent", userAgent) - if r.config.APIKey != "" { - if r.config.AuthenticationScheme != "" { - req.Header.Set("Authorization", r.config.AuthenticationScheme+" "+r.config.APIKey) + if r.apiKey != "" { + if r.authScheme != "" { + req.Header.Set("Authorization", r.authScheme+" "+r.apiKey) } else { - req.Header.Set("Authorization", r.config.APIKey) + req.Header.Set("Authorization", r.apiKey) } } for k, v := range ri.headers { @@ -92,7 +210,7 @@ func (r *requester) createHTTPRequest(ctx context.Context, ri *requestInfo) (*ht } // processEventArray publishes an event for each object contained in the array. It returns the last object in the array and an error if any. -func (r *requester) processEventArray(publisher cursor.Publisher, events []interface{}) (map[string]interface{}, error) { +func (r *requester) processEventArray(publisher stateless.Publisher, events []interface{}) (map[string]interface{}, error) { var last map[string]interface{} for _, t := range events { switch v := t.(type) { @@ -103,9 +221,7 @@ func (r *requester) processEventArray(publisher cursor.Publisher, events []inter if err != nil { return nil, errors.Wrapf(err, "failed to marshal %+v", e) } - if err := publisher.Publish(makeEvent(string(d)), r.cursorValue); err != nil { - return nil, err - } + publisher.Publish(makeEvent(string(d))) } default: return nil, errors.Errorf("expected only JSON objects in the array but got a %T", v) @@ -117,12 +233,12 @@ func (r *requester) processEventArray(publisher cursor.Publisher, events []inter func (r *requester) splitEvent(event map[string]interface{}) []map[string]interface{} { m := common.MapStr(event) - hasSplitKey, _ := m.HasKey(r.config.SplitEventsBy) - if r.config.SplitEventsBy == "" || !hasSplitKey { + hasSplitKey, _ := m.HasKey(r.splitEventsBy) + if r.splitEventsBy == "" || !hasSplitKey { return []map[string]interface{}{event} } - splitOnIfc, _ := m.GetValue(r.config.SplitEventsBy) + splitOnIfc, _ := m.GetValue(r.splitEventsBy) splitOn, ok := splitOnIfc.([]interface{}) // if not an array or is empty, we do nothing if !ok || len(splitOn) == 0 { @@ -138,8 +254,7 @@ func (r *requester) splitEvent(event map[string]interface{}) []map[string]interf } mm := m.Clone() - _, err := mm.Put(r.config.SplitEventsBy, s) - if err != nil { + if _, err := mm.Put(r.splitEventsBy, s); err != nil { return []map[string]interface{}{event} } @@ -148,317 +263,3 @@ func (r *requester) splitEvent(event map[string]interface{}) []map[string]interf return events } - -// getNextLinkFromHeader retrieves the next URL for pagination from the HTTP Header of the response -func getNextLinkFromHeader(header http.Header, fieldName string, re *regexp.Regexp) (string, error) { - links, ok := header[fieldName] - if !ok { - return "", errors.Errorf("field %s does not exist in the HTTP Header", fieldName) - } - for _, link := range links { - matchArray := re.FindAllStringSubmatch(link, -1) - if len(matchArray) == 1 { - return matchArray[0][1], nil - } - } - return "", nil -} - -// getRateLimit get the rate limit value if specified in the HTTP Header of the response, -// and returns an init64 value in seconds since unix epoch for rate limit reset time. -// When there is a remaining rate limit quota, or when the rate limit reset time has expired, it -// returns 0 for the epoch value. -func getRateLimit(header http.Header, rateLimit *RateLimit) (int64, error) { - if rateLimit != nil { - if rateLimit.Remaining != "" { - remaining := header.Get(rateLimit.Remaining) - if remaining == "" { - return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", rateLimit.Remaining) - } - m, err := strconv.ParseInt(remaining, 10, 64) - if err != nil { - return 0, errors.Wrapf(err, "failed to parse rate-limit remaining value") - } - if m == 0 { - reset := header.Get(rateLimit.Reset) - if reset == "" { - return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", rateLimit.Reset) - } - epoch, err := strconv.ParseInt(reset, 10, 64) - if err != nil { - return 0, errors.Wrapf(err, "failed to parse rate-limit reset value") - } - if time.Unix(epoch, 0).Sub(time.Now()) <= 0 { - return 0, nil - } - return epoch, nil - } - } - } - return 0, nil -} - -// applyRateLimit applies appropriate rate limit if specified in the HTTP Header of the response -func (r *requester) applyRateLimit(ctx context.Context, header http.Header, rateLimit *RateLimit) error { - epoch, err := getRateLimit(header, rateLimit) - if err != nil { - return err - } - t := time.Unix(epoch, 0) - w := time.Until(t) - if epoch == 0 || w <= 0 { - r.log.Debugf("Rate Limit: No need to apply rate limit.") - return nil - } - r.log.Debugf("Rate Limit: Wait until %v for the rate limit to reset.", t) - ticker := time.NewTicker(w) - defer ticker.Stop() - select { - case <-ctx.Done(): - r.log.Info("Context done.") - return nil - case <-ticker.C: - r.log.Debug("Rate Limit: time is up.") - return nil - } -} - -// createRequestInfoFromBody creates a new RequestInfo for a new HTTP request in pagination based on HTTP response body -func createRequestInfoFromBody(config *Pagination, response, last common.MapStr, ri *requestInfo) (*requestInfo, error) { - // we try to get it from last element, if not found, from the original response - v, err := last.GetValue(config.IDField) - if err == common.ErrKeyNotFound { - v, err = response.GetValue(config.IDField) - } - - if err == common.ErrKeyNotFound { - return nil, nil - } - - if err != nil { - return nil, errors.Wrapf(err, "failed to retrieve id_field for pagination") - } - - if config.RequestField != "" { - ri.contentMap.Put(config.RequestField, v) - if config.URL != "" { - ri.url = config.URL - } - } else if config.URLField != "" { - url, err := url.Parse(ri.url) - if err == nil { - q := url.Query() - q.Set(config.URLField, fmt.Sprint(v)) - url.RawQuery = q.Encode() - ri.url = url.String() - } - } else { - switch vt := v.(type) { - case string: - ri.url = vt - default: - return nil, errors.New("pagination ID is not of string type") - } - } - if len(config.ExtraBodyContent) > 0 { - ri.contentMap.Update(common.MapStr(config.ExtraBodyContent)) - } - return ri, nil -} - -// processHTTPRequest processes HTTP request, and handles pagination if enabled -func (r *requester) processHTTPRequest(ctx context.Context, publisher cursor.Publisher, ri *requestInfo) error { - ri.url = r.getURL() - fmt.Println(ri.url) - var ( - m, v interface{} - response, mm map[string]interface{} - ) - - for { - req, err := r.createHTTPRequest(ctx, ri) - if err != nil { - return errors.Wrapf(err, "failed to create http request") - } - msg, err := r.client.Do(req) - if err != nil { - return errors.Wrapf(err, "failed to execute http client.Do") - } - responseData, err := ioutil.ReadAll(msg.Body) - header := msg.Header - msg.Body.Close() - if err != nil { - return errors.Wrapf(err, "failed to read http.response.body") - } - if msg.StatusCode != http.StatusOK { - r.log.Debugw("HTTP request failed", "http.response.status_code", msg.StatusCode, "http.response.body", string(responseData)) - if msg.StatusCode == http.StatusTooManyRequests { - if err = r.applyRateLimit(ctx, header, r.config.RateLimit); err != nil { - return err - } - continue - } - return errors.Errorf("http request was unsuccessful with a status code %d", msg.StatusCode) - } - - err = json.Unmarshal(responseData, &m) - if err != nil { - r.log.Debug("failed to unmarshal http.response.body", string(responseData)) - return errors.Wrapf(err, "failed to unmarshal http.response.body") - } - switch obj := m.(type) { - // Top level Array - case []interface{}: - mm, err = r.processEventArray(publisher, obj) - if err != nil { - return err - } - case map[string]interface{}: - response = obj - if r.config.JSONObjects == "" { - mm, err = r.processEventArray(publisher, []interface{}{obj}) - if err != nil { - return err - } - } else { - v, err = common.MapStr(obj).GetValue(r.config.JSONObjects) - if err != nil { - if err == common.ErrKeyNotFound { - break - } - return err - } - switch ts := v.(type) { - case []interface{}: - mm, err = r.processEventArray(publisher, ts) - if err != nil { - return err - } - default: - return errors.Errorf("content of %s is not a valid array", r.config.JSONObjects) - } - } - default: - r.log.Debug("http.response.body is not a valid JSON object", string(responseData)) - return errors.Errorf("http.response.body is not a valid JSON object, but a %T", obj) - } - - if mm != nil && r.config.Pagination.IsEnabled() { - if r.config.Pagination.Header != nil { - // Pagination control using HTTP Header - url, err := getNextLinkFromHeader(header, r.config.Pagination.Header.FieldName, r.config.Pagination.Header.RegexPattern) - if err != nil { - return errors.Wrapf(err, "failed to retrieve the next URL for pagination") - } - if ri.url == url || url == "" { - r.log.Info("Pagination finished.") - break - } - ri.url = url - if err = r.applyRateLimit(ctx, header, r.config.RateLimit); err != nil { - return err - } - r.log.Info("Continuing with pagination to URL: ", ri.url) - continue - } else { - // Pagination control using HTTP Body fields - ri, err = createRequestInfoFromBody(r.config.Pagination, common.MapStr(response), common.MapStr(mm), ri) - if err != nil { - return err - } - if ri == nil { - break - } - if err = r.applyRateLimit(ctx, header, r.config.RateLimit); err != nil { - return err - } - r.log.Info("Continuing with pagination to URL: ", ri.url) - continue - } - } - break - } - - if mm != nil && r.config.DateCursor.IsEnabled() { - r.advanceCursor(common.MapStr(mm)) - } - - return nil -} - -func (r *requester) getURL() string { - if !r.config.DateCursor.IsEnabled() { - return r.config.URL - } - - var dateStr string - if r.cursorValue == "" { - t := timeNow().UTC().Add(-r.config.DateCursor.InitialInterval) - dateStr = t.Format(r.config.DateCursor.GetDateFormat()) - } else { - dateStr = r.cursorValue - } - - url, err := url.Parse(r.config.URL) - if err != nil { - return r.config.URL - } - - q := url.Query() - - var value string - if r.config.DateCursor.ValueTemplate == nil { - value = dateStr - } else { - buf := new(bytes.Buffer) - if err := r.config.DateCursor.ValueTemplate.Execute(buf, dateStr); err != nil { - return r.config.URL - } - value = buf.String() - } - - q.Set(r.config.DateCursor.URLField, value) - - url.RawQuery = q.Encode() - - return url.String() -} - -func (r *requester) advanceCursor(m common.MapStr) { - if r.config.DateCursor.Field == "" { - r.cursorValue = time.Now().UTC().Format(r.config.DateCursor.GetDateFormat()) - return - } - - v, err := m.GetValue(r.config.DateCursor.Field) - if err != nil { - r.log.Warnf("date_cursor field: %q", err) - return - } - switch t := v.(type) { - case string: - _, err := time.Parse(r.config.DateCursor.GetDateFormat(), t) - if err != nil { - r.log.Warn("date_cursor field does not have the expected layout") - return - } - r.cursorValue = t - default: - r.log.Warn("date_cursor field must be a string, cursor will not advance") - return - } -} - -func (r *requester) loadCheckpoint(cursor cursor.Cursor) { - var nextCursorValue string - if cursor.IsNew() { - return - } - - if err := cursor.Unpack(&nextCursorValue); err != nil { - r.log.Errorf("Reset cursor position. Failed to read checkpoint from registry: %v", err) - return - } - - r.cursorValue = nextCursorValue -} diff --git a/x-pack/filebeat/tests/system/test_httpjson.py b/x-pack/filebeat/tests/system/test_httpjson.py index 96e41c7f782..af03d723e8a 100644 --- a/x-pack/filebeat/tests/system/test_httpjson.py +++ b/x-pack/filebeat/tests/system/test_httpjson.py @@ -609,3 +609,51 @@ def handler(): assert json.loads(output[1]["message"]) == message[1] assert json.loads(output[2]["message"]) == message[0] assert json.loads(output[3]["message"]) == message[1] + + def test_pagination(self): + """ + Test httpjson input works correctly with pagination + """ + + message = [ + {"@timestamp": "2002-10-02T15:00:00Z", "nextPageToken": "bar", "items": [{"foo": "bar"}]}, + {"@timestamp": "2002-10-02T15:00:01Z", "items": [{"bar": "bazz"}]} + ] + + times = 0 + + def handler(): + nonlocal times + if times == 1: + if request.values["page"] != "bar": + resp = jsonify({"error": "wrong page token"}) + resp.status_code = 400 + return resp + resp = jsonify(message[times]) + times += 1 + return resp + + shutdown_func = self.start_server("GET", handler) + + options = [ + "http_method: GET", + "interval: 300ms", + "url: http://localhost:5000", + "pagination.id_field: nextPageToken", + "pagination.url_field: page", + "json_objects_array: items" + ] + self.set_config(options) + + filebeat = self.start_beat() + + self.wait_until(lambda: self.output_count(lambda x: x == 2)) + + filebeat.check_kill_and_wait() + shutdown_func() + + output = self.read_output() + + assert output[0]["input.type"] == "httpjson" + assert json.loads(output[0]["message"]) == {"foo": "bar"} + assert json.loads(output[1]["message"]) == {"bar": "bazz"} From 5bd77e91f2797b56a2dc35dff38df08a4534b882 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Fri, 31 Jul 2020 12:04:39 +0200 Subject: [PATCH 05/15] Remove python tests --- x-pack/filebeat/tests/system/test_httpjson.py | 659 ------------------ 1 file changed, 659 deletions(-) delete mode 100644 x-pack/filebeat/tests/system/test_httpjson.py diff --git a/x-pack/filebeat/tests/system/test_httpjson.py b/x-pack/filebeat/tests/system/test_httpjson.py deleted file mode 100644 index af03d723e8a..00000000000 --- a/x-pack/filebeat/tests/system/test_httpjson.py +++ /dev/null @@ -1,659 +0,0 @@ -import json -import jinja2 -import os -import random -import sys - -from datetime import datetime -from flask import Flask, jsonify, request -from multiprocessing import Process - -sys.path.append(os.path.join(os.path.dirname(__file__), - '../../../../filebeat/tests/system')) - -from filebeat import BaseTest - - -class Test(BaseTest): - """ - Test filebeat with the httpjson input - """ - @classmethod - def setUpClass(self): - self.beat_name = "filebeat" - self.beat_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), "../../")) - - super(BaseTest, self).setUpClass() - - def setUp(self): - super(BaseTest, self).setUp() - - # Hack to make jinja2 have the right paths - self.template_env = jinja2.Environment( - loader=jinja2.FileSystemLoader([ - os.path.abspath(os.path.join( - self.beat_path, "../../filebeat")), - os.path.abspath(os.path.join(self.beat_path, "../../libbeat")) - ]) - ) - - def set_config(self, extra_options=[]): - """ - General function so that we do not have to define settings each time - """ - options = ["- type: httpjson", "enabled: true"] - options.extend(extra_options) - - self.render_config_template( - input_raw='\n '.join(options), - inputs=False, - ) - - def start_server(self, method, handler, ssl=False): - """ - Creates a new http test server that will respond with the given handler - """ - app = Flask(__name__) - app.app_context().push() - - app.route('/', methods=[method])(handler) - - kwargs = {} - if ssl: - kwargs = {"ssl_context": "adhoc"} - - process = Process(target=app.run, kwargs=kwargs) - - def shutdown(): - app.do_teardown_appcontext() - process.terminate() - process.join() - - process.start() - - return shutdown - - def test_get(self): - """ - Test httpjson input performs a simple GET request correctly. - """ - - message = {"hello": "world"} - - def handler(): - return jsonify(message) - - shutdown_func = self.start_server("GET", handler) - - options = [ - "http_method: GET", - "interval: 0", - "url: http://localhost:5000" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 1)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message - - def test_get_https(self): - """ - Test httpjson input performs a simple GET request with HTTPS correctly. - """ - - message = {"hello": "world"} - - def handler(): - return jsonify(message) - - shutdown_func = self.start_server("GET", handler, ssl=True) - - options = [ - "http_method: GET", - "interval: 0", - "url: https://localhost:5000", - "ssl.verification_mode: none" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 1)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message - - def test_rate_limit_retry(self): - """ - Test httpjson input performs a retry when is rate limited. - """ - - message = {"hello": "world"} - - is_retry = False - - def handler(): - nonlocal is_retry - - resp = jsonify(message) - if is_retry: - return resp - - is_retry = True - resp.headers["X-Rate-Limit-Limit"] = "0" - resp.headers["X-Rate-Limit-Remaining"] = "0" - resp.headers["X-Rate-Limit-Reset"] = datetime.timestamp( - datetime.now()) - resp.status_code = 429 - - return resp - - shutdown_func = self.start_server("GET", handler) - - options = [ - "http_method: GET", - "interval: 0", - "url: http://localhost:5000" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 1)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message - - def test_error_retry(self): - """ - Test httpjson input performs a retry when the request fails. - """ - - message = {"hello": "world"} - - retry_count = 0 - - def handler(): - nonlocal retry_count - - resp = jsonify(message) - if retry_count == 2: - return resp - - retry_count += 1 - resp.status_code = random.randrange(500, 599) - - return resp - - shutdown_func = self.start_server("GET", handler) - - options = [ - "http_method: GET", - "interval: 0", - "url: http://localhost:5000" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 1)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message - - def test_array_response(self): - """ - Test httpjson input parses properly an array response. - """ - - message = { - "hello": [ - { - "foo": "bar", - "list": [ - {"foo": "bar"}, - {"hello": "world"} - ] - }, - { - "foo": "bar", - "list": [ - {"foo": "bar"} - ] - } - ] - } - - def handler(): - return jsonify(message) - - shutdown_func = self.start_server("GET", handler) - - options = [ - "http_method: GET", - "interval: 0", - "url: http://localhost:5000", - "json_objects_array: hello" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 2)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message["hello"][0] - assert json.loads(output[1]["message"]) == message["hello"][1] - - def test_post(self): - """ - Test httpjson input performs a simple POST request correctly. - """ - - message = {"hello": "world"} - - def handler(): - if request.get_json() != {"test": "abc"}: - resp = jsonify({"error": "got {}".format(request.get_data())}) - resp.status_code = 400 - return resp - return jsonify(message) - - shutdown_func = self.start_server("POST", handler) - - options = [ - "http_method: POST", - "interval: 0", - "url: http://localhost:5000", - "http_request_body:", - " test: abc" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 1)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message - - def test_repeated_post(self): - """ - Test httpjson input performs several POST requests correctly. - """ - - message = {"hello": "world"} - - def handler(): - if request.get_json() != {"test": "abc"}: - resp = jsonify({"error": "got {}".format(request.get_data())}) - resp.status_code = 400 - return resp - return jsonify(message) - - shutdown_func = self.start_server("POST", handler) - - options = [ - "http_method: POST", - "interval: 300ms", - "url: http://localhost:5000", - "http_request_body:", - " test: abc" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 3)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message - assert json.loads(output[1]["message"]) == message - assert json.loads(output[2]["message"]) == message - - def test_oauth2(self): - """ - Test httpjson input performs oauth2 requests correctly. - """ - - message = {"hello": "world"} - is_oauth2_token_request = True - - def handler(): - nonlocal is_oauth2_token_request - if is_oauth2_token_request: - if request.method != "POST": - resp = jsonify({"error": "expected POST request"}) - resp.status_code = 400 - return resp - if request.values["grant_type"] != "client_credentials": - resp = jsonify({"error": "expected grant_type was client_credentials"}) - resp.status_code = 400 - return resp - if request.values["client_id"] != "a_client_id" or request.values["client_secret"] != "a_client_secret": - resp = jsonify({"error": "expected client credentials a_client_id:a_client_secret"}) - resp.status_code = 400 - return resp - if request.values["scope"] != "scope1 scope2": - resp = jsonify({"error": "expected scope was scope1+scope2"}) - resp.status_code = 400 - return resp - if request.values["param1"] != "v1": - resp = jsonify({"error": "expected param1 was v1"}) - resp.status_code = 400 - return resp - is_oauth2_token_request = False - return jsonify({"token_type": "Bearer", "expires_in": "60", "access_token": "abcd"}) - return jsonify(message) - - shutdown_func = self.start_server("POST", handler) - - options = [ - "http_method: POST", - "interval: 0", - "url: http://localhost:5000", - "oauth2.client.id: a_client_id", - "oauth2.client.secret: a_client_secret", - "oauth2.token_url: http://localhost:5000", - "oauth2.endpoint_params:", - " param1: v1", - "oauth2.scopes: [scope1, scope2]" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 1)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message - - def test_split_events_by(self): - """ - Test httpjson input splits events by key correctly. - """ - - message = { - "hello": "world", - "embedded": { - "hello": "world", - }, - "list": [ - {"foo": "bar"}, - {"hello": "world"} - ] - } - - def handler(): - return jsonify(message) - - shutdown_func = self.start_server("GET", handler) - - options = [ - "http_method: GET", - "interval: 0", - "url: http://localhost:5000", - "split_events_by: list" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 2)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - expected1 = { - "hello": "world", - "embedded": { - "hello": "world", - }, - "list": {"foo": "bar"} - } - - expected2 = { - "hello": "world", - "embedded": { - "hello": "world", - }, - "list": {"hello": "world"} - } - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == expected1 - assert json.loads(output[1]["message"]) == expected2 - - def test_split_events_by_not_found(self): - """ - Test httpjson input does not fail when split key is not found - """ - - message = {"hello": "world"} - - def handler(): - return jsonify(message) - - shutdown_func = self.start_server("GET", handler) - - options = [ - "http_method: GET", - "interval: 0", - "url: http://localhost:5000", - "split_events_by: list" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 1)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message - - def test_split_events_by_with_array(self): - """ - Test httpjson input generate events when splitting from a key inside a list - """ - - message = { - "objs": [ - { - "foo": "bar", - "list": [ - {"bar": "baz"}, - {"one": "two"} - ] - }, - {"foo": "bar"} - ] - } - - def handler(): - return jsonify(message) - - shutdown_func = self.start_server("GET", handler) - - options = [ - "http_method: GET", - "interval: 0", - "url: http://localhost:5000", - "json_objects_array: objs", - "split_events_by: list" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 3)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - expected1 = { - "foo": "bar", - "list": {"bar": "baz"} - } - - expected2 = { - "foo": "bar", - "list": {"one": "two"} - } - - expected3 = {"foo": "bar"} - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == expected1 - assert json.loads(output[1]["message"]) == expected2 - assert json.loads(output[2]["message"]) == expected3 - - def test_cursor(self): - """ - Test httpjson input works correctly with a date cursor - """ - - message = [ - {"@timestamp": "2002-10-02T15:00:00Z", "foo": "bar"}, - {"@timestamp": "2002-10-02T15:00:01Z", "foo": "bar"} - ] - - times = 0 - - def handler(): - nonlocal times - if times == 1: - if request.values["$filter"] != "alertCreationTime ge 2002-10-02T15:00:01Z": - resp = jsonify({"error": "wrong filter"}) - resp.status_code = 400 - return resp - times += 1 - return jsonify(message) - - shutdown_func = self.start_server("GET", handler) - - options = [ - "http_method: GET", - "interval: 300ms", - "url: http://localhost:5000", - "date_cursor.field: \"@timestamp\"", - "date_cursor.url_field: $filter", - "date_cursor.value_template: alertCreationTime ge {{.}}", - "date_cursor.initial_interval: 10m", - "date_cursor.date_format: 2006-01-02T15:04:05Z", - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 4)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == message[0] - assert json.loads(output[1]["message"]) == message[1] - assert json.loads(output[2]["message"]) == message[0] - assert json.loads(output[3]["message"]) == message[1] - - def test_pagination(self): - """ - Test httpjson input works correctly with pagination - """ - - message = [ - {"@timestamp": "2002-10-02T15:00:00Z", "nextPageToken": "bar", "items": [{"foo": "bar"}]}, - {"@timestamp": "2002-10-02T15:00:01Z", "items": [{"bar": "bazz"}]} - ] - - times = 0 - - def handler(): - nonlocal times - if times == 1: - if request.values["page"] != "bar": - resp = jsonify({"error": "wrong page token"}) - resp.status_code = 400 - return resp - resp = jsonify(message[times]) - times += 1 - return resp - - shutdown_func = self.start_server("GET", handler) - - options = [ - "http_method: GET", - "interval: 300ms", - "url: http://localhost:5000", - "pagination.id_field: nextPageToken", - "pagination.url_field: page", - "json_objects_array: items" - ] - self.set_config(options) - - filebeat = self.start_beat() - - self.wait_until(lambda: self.output_count(lambda x: x == 2)) - - filebeat.check_kill_and_wait() - shutdown_func() - - output = self.read_output() - - assert output[0]["input.type"] == "httpjson" - assert json.loads(output[0]["message"]) == {"foo": "bar"} - assert json.loads(output[1]["message"]) == {"bar": "bazz"} From 723d87c618581434f790458175bafa5f334e021c Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Fri, 31 Jul 2020 12:05:08 +0200 Subject: [PATCH 06/15] Do not fail if there is no next page --- x-pack/filebeat/input/httpjson/pagination.go | 8 ++++---- x-pack/filebeat/input/httpjson/requester.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/filebeat/input/httpjson/pagination.go b/x-pack/filebeat/input/httpjson/pagination.go index e334761c782..330528a82df 100644 --- a/x-pack/filebeat/input/httpjson/pagination.go +++ b/x-pack/filebeat/input/httpjson/pagination.go @@ -47,6 +47,10 @@ func (p *pagination) nextRequestInfo(ri *requestInfo, response response, lastObj var err error // Pagination control using HTTP Body fields if err = p.setRequestInfoFromBody(response.body, lastObj, ri); err != nil { + // if the field is not found, there is no next page + if errors.Cause(err) == common.ErrKeyNotFound { + return ri, false, nil + } return ri, false, err } @@ -90,10 +94,6 @@ func (p *pagination) setRequestInfoFromBody(response, last common.MapStr, ri *re v, err = response.GetValue(p.idField) } - if err == common.ErrKeyNotFound { - return nil - } - if err != nil { return errors.Wrapf(err, "failed to retrieve id_field for pagination") } diff --git a/x-pack/filebeat/input/httpjson/requester.go b/x-pack/filebeat/input/httpjson/requester.go index c5952f860cd..af169cfe137 100644 --- a/x-pack/filebeat/input/httpjson/requester.go +++ b/x-pack/filebeat/input/httpjson/requester.go @@ -119,7 +119,7 @@ func (r *requester) processHTTPRequest(ctx context.Context, publisher stateless. if err = json.Unmarshal(responseData, &m); err != nil { r.log.Debug("failed to unmarshal http.response.body", string(responseData)) - return errors.Wrapf(err, "failed to unmarshal http.response.body") + return errors.Wrapf(err, "failed to unmarshal http.response.body %q", string(responseData)) } switch obj := m.(type) { From 73a5addaec1de8cb986e8efecb6cc35f53e7e0f8 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Fri, 31 Jul 2020 12:05:27 +0200 Subject: [PATCH 07/15] Refactor go integration tests to work with v2 input --- .../filebeat/input/httpjson/httpjson_test.go | 414 ++++++++++++++++++ 1 file changed, 414 insertions(+) create mode 100644 x-pack/filebeat/input/httpjson/httpjson_test.go diff --git a/x-pack/filebeat/input/httpjson/httpjson_test.go b/x-pack/filebeat/input/httpjson/httpjson_test.go new file mode 100644 index 00000000000..39eb77a13a6 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/httpjson_test.go @@ -0,0 +1,414 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package httpjson + +import ( + "context" + "fmt" + "io/ioutil" + "math/rand" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +func TestHTTPJSONInput(t *testing.T) { + testCases := []struct { + name string + setup func(map[string]interface{}) interface{} + teardown func(interface{}) + baseConfig map[string]interface{} + ssl bool + handler http.HandlerFunc + expected []string + duration time.Duration + }{ + { + name: "Test simple GET request", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": 0, + }, + handler: defaultHandler("GET", ""), + expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, + }, + { + name: "Test simple HTTPS GET request", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": 0, + "ssl.verification_mode": "none", + }, + handler: defaultHandler("GET", ""), + expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, + ssl: true, + }, + { + name: "Test request honors rate limit", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": 0, + "rate_limit.limit": "X-Rate-Limit-Limit", + "rate_limit.remaining": "X-Rate-Limit-Remaining", + "rate_limit.reset": "X-Rate-Limit-Reset", + }, + handler: rateLimitHandler(), + expected: []string{`{"hello":"world"}`}, + }, + { + name: "Test request retries when failed", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": 0, + }, + handler: retryHandler(), + expected: []string{`{"hello":"world"}`}, + }, + { + name: "Test POST request with body", + baseConfig: map[string]interface{}{ + "http_method": "POST", + "interval": 0, + "http_request_body": map[string]interface{}{ + "test": "abc", + }, + }, + handler: defaultHandler("POST", `{"test":"abc"}`), + expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, + }, + { + name: "Test repeated POST requests", + baseConfig: map[string]interface{}{ + "http_method": "POST", + "interval": "400ms", + }, + handler: defaultHandler("POST", ""), + expected: []string{ + `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`, + `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`, + }, + duration: 700 * time.Millisecond, + }, + { + name: "Test json objects array", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": 0, + "json_objects_array": "hello", + }, + handler: defaultHandler("GET", ""), + expected: []string{`{"world":"moon"}`, `{"space":[{"cake":"pumpkin"}]}`}, + }, + { + name: "Test split events by", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": 0, + "split_events_by": "hello", + }, + handler: defaultHandler("GET", ""), + expected: []string{ + `{"hello":{"world":"moon"}}`, + `{"hello":{"space":[{"cake":"pumpkin"}]}}`, + }, + }, + { + name: "Test split events by with array", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": 0, + "split_events_by": "space", + "json_objects_array": "hello", + }, + handler: defaultHandler("GET", ""), + expected: []string{ + `{"world":"moon"}`, + `{"space":{"cake":"pumpkin"}}`, + }, + }, + { + name: "Test split events by not found", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": 0, + "split_events_by": "unknwown", + }, + handler: defaultHandler("GET", ""), + expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, + }, + { + name: "Test date cursor", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": "400ms", + "date_cursor.field": "@timestamp", + "date_cursor.url_field": "$filter", + "date_cursor.value_template": "alertCreationTime ge {{.}}", + "date_cursor.initial_interval": "10m", + "date_cursor.date_format": "2006-01-02T15:04:05Z", + }, + handler: dateCursorHandler(), + expected: []string{ + `{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, + }, + duration: 700 * time.Millisecond, + setup: func(map[string]interface{}) interface{} { + timeNow = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z") + return t + } + return nil + }, + }, + { + name: "Test pagination", + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": 0, + "pagination.id_field": "nextPageToken", + "pagination.url_field": "page", + "json_objects_array": "items", + }, + handler: paginationHandler(), + expected: []string{`{"foo":"bar"}`, `{"foo":"bar"}`}, + }, + { + name: "Test oauth2", + baseConfig: map[string]interface{}{ + "http_method": "POST", + "interval": "0", + "oauth2.client.id": "a_client_id", + "oauth2.client.secret": "a_client_secret", + "oauth2.endpoint_params": map[string]interface{}{ + "param1": "v1", + }, + "oauth2.scopes": []string{"scope1", "scope2"}, + }, + setup: func(config map[string]interface{}) interface{} { + server := httptest.NewServer(http.HandlerFunc(oauth2TokenHandler)) + config["oauth2.token_url"] = server.URL + return server + }, + teardown: func(i interface{}) { + server := i.(*httptest.Server) + server.Close() + }, + handler: oauth2Handler, + expected: []string{`{"hello": "world"}`}, + }, + } + + for _, testCase := range testCases { + tc := testCase + t.Run(tc.name, func(t *testing.T) { + server := func() *httptest.Server { + if tc.ssl { + return httptest.NewTLSServer(tc.handler) + } + return httptest.NewServer(tc.handler) + }() + defer server.Close() + + tc.baseConfig["url"] = server.URL + + var setupResult interface{} + if tc.setup != nil { + setupResult = tc.setup(tc.baseConfig) + } + if tc.teardown != nil { + defer tc.teardown(setupResult) + } + + cfg := common.MustNewConfigFrom(tc.baseConfig) + + input, err := configure(cfg) + + assert.NoError(t, err) + assert.Equal(t, "httpjson", input.Name()) + assert.NoError(t, input.Test(v2.TestContext{})) + + pub := &publisher{} + + ctx, cancel := newV2Context(tc.duration) + defer cancel() + + assert.NoError(t, input.Run(ctx, pub)) + + assert.Equal(t, len(tc.expected), len(pub.events)) + for i, e := range pub.events { + val, err := e.Fields.GetValue("message") + assert.NoError(t, err) + assert.JSONEq(t, tc.expected[i], val.(string)) + } + }) + } +} + +func newV2Context(d time.Duration) (v2.Context, func()) { + ctx, cancel := func() (context.Context, func()) { + if d == 0 { + return context.WithCancel(context.Background()) + } + return context.WithTimeout(context.Background(), d) + }() + return v2.Context{ + Logger: logp.NewLogger("httpjson_test"), + ID: "test_id", + Cancelation: ctx, + }, cancel +} + +type publisher struct { + events []beat.Event +} + +func (p *publisher) Publish(e beat.Event) { + p.events = append(p.events, e) +} + +func defaultHandler(expectedMethod, expectedBody string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + msg := `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}` + switch { + case r.Method != expectedMethod: + w.WriteHeader(http.StatusBadRequest) + msg = fmt.Sprintf(`{"error":"expected method was %q"}`, expectedMethod) + case expectedBody != "": + body, _ := ioutil.ReadAll(r.Body) + r.Body.Close() + if expectedBody != string(body) { + w.WriteHeader(http.StatusBadRequest) + msg = fmt.Sprintf(`{"error":"expected body was %q"}`, expectedBody) + } + } + + _, _ = w.Write([]byte(msg)) + } +} + +func rateLimitHandler() http.HandlerFunc { + var isRetry bool + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + if isRetry { + _, _ = w.Write([]byte(`{"hello":"world"}`)) + return + } + w.Header().Set("X-Rate-Limit-Limit", "0") + w.Header().Set("X-Rate-Limit-Remaining", "0") + w.Header().Set("X-Rate-Limit-Reset", fmt.Sprint(time.Now().Unix())) + w.WriteHeader(http.StatusTooManyRequests) + isRetry = true + _, _ = w.Write([]byte(`{"error":"too many requests"}`)) + } +} + +func retryHandler() http.HandlerFunc { + count := 0 + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + if count == 2 { + _, _ = w.Write([]byte(`{"hello":"world"}`)) + return + } + w.WriteHeader(rand.Intn(100) + 500) + count += 1 + _, _ = w.Write([]byte(`{"error":"failed"}`)) + } +} + +func oauth2TokenHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + _ = r.ParseForm() + switch { + case r.Method != "POST": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong method"}`)) + case r.FormValue("grant_type") != "client_credentials": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong grant_type"}`)) + case r.FormValue("client_id") != "a_client_id": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong client_id"}`)) + case r.FormValue("client_secret") != "a_client_secret": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong client_secret"}`)) + case r.FormValue("scope") != "scope1 scope2": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong scope"}`)) + case r.FormValue("param1") != "v1": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong param1"}`)) + default: + _, _ = w.Write([]byte(`{"token_type": "Bearer", "expires_in": "60", "access_token": "abcd"}`)) + } +} + +func oauth2Handler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + switch { + case r.Method != "POST": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong method"}`)) + case r.Header.Get("Authorization") != "Bearer abcd": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong bearer"}`)) + default: + _, _ = w.Write([]byte(`{"hello":"world"}`)) + } +} + +func dateCursorHandler() http.HandlerFunc { + var count int + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + switch count { + case 0: + if r.URL.Query().Get("$filter") != "alertCreationTime ge 2002-10-02T14:50:00Z" { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong initial cursor value"`)) + return + } + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`)) + case 1: + if r.URL.Query().Get("$filter") != "alertCreationTime ge 2002-10-02T15:00:00Z" { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong cursor value"`)) + return + } + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`)) + } + count += 1 + } +} + +func paginationHandler() http.HandlerFunc { + var count int + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + switch count { + case 0: + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:00Z","nextPageToken":"bar","items":[{"foo":"bar"}]}`)) + case 1: + if r.URL.Query().Get("page") != "bar" { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong page token value"}`)) + return + } + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:01Z","items":[{"foo":"bar"}]}`)) + } + count += 1 + } +} From 0b7d9d485207cd68f3e639ceaf85ff18f67811ea Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Mon, 17 Aug 2020 10:59:03 +0200 Subject: [PATCH 08/15] Do suggested changes to input and tests --- CHANGELOG.next.asciidoc | 2 - .../filebeat/input/httpjson/httpjson_test.go | 176 +++++++++++------- x-pack/filebeat/input/httpjson/input.go | 23 +-- 3 files changed, 115 insertions(+), 86 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index b690d573ba3..1778eace862 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -531,8 +531,6 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Add event.ingested to all Filebeat modules. {pull}20386[20386] - Return error when log harvester tries to open a named pipe. {issue}18682[18682] {pull}20450[20450] - Avoid goroutine leaks in Filebeat readers. {issue}19193[19193] {pull}20455[20455] - -- Use new cursor input for httpjson input {pull}20226[20226] - Convert httpjson to v2 input {pull}20226[20226] *Heartbeat* diff --git a/x-pack/filebeat/input/httpjson/httpjson_test.go b/x-pack/filebeat/input/httpjson/httpjson_test.go index 39eb77a13a6..69ffbfc8f7f 100644 --- a/x-pack/filebeat/input/httpjson/httpjson_test.go +++ b/x-pack/filebeat/input/httpjson/httpjson_test.go @@ -17,24 +17,24 @@ import ( "github.com/stretchr/testify/assert" v2 "github.com/elastic/beats/v7/filebeat/input/v2" - "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/beats/v7/libbeat/common" "github.com/elastic/beats/v7/libbeat/logp" + beattest "github.com/elastic/beats/v7/libbeat/publisher/testing" ) func TestHTTPJSONInput(t *testing.T) { testCases := []struct { - name string - setup func(map[string]interface{}) interface{} - teardown func(interface{}) - baseConfig map[string]interface{} - ssl bool - handler http.HandlerFunc - expected []string - duration time.Duration + name string + setupServer func(*testing.T, http.HandlerFunc, map[string]interface{}) + baseConfig map[string]interface{} + handler http.HandlerFunc + expected []string + expectedError string + duration time.Duration }{ { - name: "Test simple GET request", + name: "Test simple GET request", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "GET", "interval": 0, @@ -43,7 +43,8 @@ func TestHTTPJSONInput(t *testing.T) { expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, }, { - name: "Test simple HTTPS GET request", + name: "Test simple HTTPS GET request", + setupServer: newTestServer(httptest.NewTLSServer), baseConfig: map[string]interface{}{ "http_method": "GET", "interval": 0, @@ -51,10 +52,10 @@ func TestHTTPJSONInput(t *testing.T) { }, handler: defaultHandler("GET", ""), expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, - ssl: true, }, { - name: "Test request honors rate limit", + name: "Test request honors rate limit", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "GET", "interval": 0, @@ -66,7 +67,8 @@ func TestHTTPJSONInput(t *testing.T) { expected: []string{`{"hello":"world"}`}, }, { - name: "Test request retries when failed", + name: "Test request retries when failed", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "GET", "interval": 0, @@ -75,7 +77,8 @@ func TestHTTPJSONInput(t *testing.T) { expected: []string{`{"hello":"world"}`}, }, { - name: "Test POST request with body", + name: "Test POST request with body", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "POST", "interval": 0, @@ -87,7 +90,8 @@ func TestHTTPJSONInput(t *testing.T) { expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, }, { - name: "Test repeated POST requests", + name: "Test repeated POST requests", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "POST", "interval": "400ms", @@ -100,7 +104,8 @@ func TestHTTPJSONInput(t *testing.T) { duration: 700 * time.Millisecond, }, { - name: "Test json objects array", + name: "Test json objects array", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "GET", "interval": 0, @@ -110,7 +115,8 @@ func TestHTTPJSONInput(t *testing.T) { expected: []string{`{"world":"moon"}`, `{"space":[{"cake":"pumpkin"}]}`}, }, { - name: "Test split events by", + name: "Test split events by", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "GET", "interval": 0, @@ -123,7 +129,8 @@ func TestHTTPJSONInput(t *testing.T) { }, }, { - name: "Test split events by with array", + name: "Test split events by with array", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "GET", "interval": 0, @@ -137,7 +144,8 @@ func TestHTTPJSONInput(t *testing.T) { }, }, { - name: "Test split events by not found", + name: "Test split events by not found", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "GET", "interval": 0, @@ -148,6 +156,17 @@ func TestHTTPJSONInput(t *testing.T) { }, { name: "Test date cursor", + setupServer: func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) { + // mock timeNow func to return a fixed value + timeNow = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z") + return t + } + + server := httptest.NewServer(h) + config["url"] = server.URL + t.Cleanup(server.Close) + }, baseConfig: map[string]interface{}{ "http_method": "GET", "interval": "400ms", @@ -163,16 +182,10 @@ func TestHTTPJSONInput(t *testing.T) { `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, }, duration: 700 * time.Millisecond, - setup: func(map[string]interface{}) interface{} { - timeNow = func() time.Time { - t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z") - return t - } - return nil - }, }, { - name: "Test pagination", + name: "Test pagination", + setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "GET", "interval": 0, @@ -183,8 +196,26 @@ func TestHTTPJSONInput(t *testing.T) { handler: paginationHandler(), expected: []string{`{"foo":"bar"}`, `{"foo":"bar"}`}, }, + { + name: "Test loop breaks on irrecoverable failure", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "http_method": "GET", + "interval": "300ms", + "retry.max_attempts": 1, + }, + handler: failAfterFirstAttemptHandler(), + expectedError: "giving up after 2 attempts", + expected: []string{`{"hello":"world"}`}, + }, { name: "Test oauth2", + setupServer: func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) { + server := httptest.NewServer(h) + config["url"] = server.URL + config["oauth2.token_url"] = server.URL + "/token" + t.Cleanup(server.Close) + }, baseConfig: map[string]interface{}{ "http_method": "POST", "interval": "0", @@ -195,15 +226,6 @@ func TestHTTPJSONInput(t *testing.T) { }, "oauth2.scopes": []string{"scope1", "scope2"}, }, - setup: func(config map[string]interface{}) interface{} { - server := httptest.NewServer(http.HandlerFunc(oauth2TokenHandler)) - config["oauth2.token_url"] = server.URL - return server - }, - teardown: func(i interface{}) { - server := i.(*httptest.Server) - server.Close() - }, handler: oauth2Handler, expected: []string{`{"hello": "world"}`}, }, @@ -212,23 +234,7 @@ func TestHTTPJSONInput(t *testing.T) { for _, testCase := range testCases { tc := testCase t.Run(tc.name, func(t *testing.T) { - server := func() *httptest.Server { - if tc.ssl { - return httptest.NewTLSServer(tc.handler) - } - return httptest.NewServer(tc.handler) - }() - defer server.Close() - - tc.baseConfig["url"] = server.URL - - var setupResult interface{} - if tc.setup != nil { - setupResult = tc.setup(tc.baseConfig) - } - if tc.teardown != nil { - defer tc.teardown(setupResult) - } + tc.setupServer(t, tc.handler, tc.baseConfig) cfg := common.MustNewConfigFrom(tc.baseConfig) @@ -238,23 +244,44 @@ func TestHTTPJSONInput(t *testing.T) { assert.Equal(t, "httpjson", input.Name()) assert.NoError(t, input.Test(v2.TestContext{})) - pub := &publisher{} + pub := beattest.NewChanClient(len(tc.expected)) + t.Cleanup(func() { _ = pub.Close() }) ctx, cancel := newV2Context(tc.duration) - defer cancel() + t.Cleanup(cancel) + + err = input.Run(ctx, pub) + switch tc.expectedError { + case "": + assert.NoError(t, err) + default: + // retryable client errors use dynamic method / host / port in the message + // and no custom type. There is no other easy way to test for a specific one + assert.Contains(t, err.Error(), tc.expectedError) + } - assert.NoError(t, input.Run(ctx, pub)) + assert.Equal(t, len(tc.expected), len(pub.Channel)) + for _, e := range tc.expected { + got := pub.ReceiveEvent() - assert.Equal(t, len(tc.expected), len(pub.events)) - for i, e := range pub.events { - val, err := e.Fields.GetValue("message") + val, err := got.Fields.GetValue("message") assert.NoError(t, err) - assert.JSONEq(t, tc.expected[i], val.(string)) + assert.JSONEq(t, e, val.(string)) } }) } } +func newTestServer( + newServer func(http.Handler) *httptest.Server, +) func(*testing.T, http.HandlerFunc, map[string]interface{}) { + return func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) { + server := newServer(h) + config["url"] = server.URL + t.Cleanup(server.Close) + } +} + func newV2Context(d time.Duration) (v2.Context, func()) { ctx, cancel := func() (context.Context, func()) { if d == 0 { @@ -269,14 +296,6 @@ func newV2Context(d time.Duration) (v2.Context, func()) { }, cancel } -type publisher struct { - events []beat.Event -} - -func (p *publisher) Publish(e beat.Event) { - p.events = append(p.events, e) -} - func defaultHandler(expectedMethod, expectedBody string) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { w.Header().Set("content-type", "application/json") @@ -325,7 +344,19 @@ func retryHandler() http.HandlerFunc { } w.WriteHeader(rand.Intn(100) + 500) count += 1 - _, _ = w.Write([]byte(`{"error":"failed"}`)) + } +} + +func failAfterFirstAttemptHandler() http.HandlerFunc { + count := 0 + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + if count == 1 { + w.WriteHeader(rand.Intn(100) + 500) + return + } + _, _ = w.Write([]byte(`{"hello":"world"}`)) + count += 1 } } @@ -357,6 +388,11 @@ func oauth2TokenHandler(w http.ResponseWriter, r *http.Request) { } func oauth2Handler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/token" { + oauth2TokenHandler(w, r) + return + } + w.Header().Set("content-type", "application/json") switch { case r.Method != "POST": diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index fa9343084cd..7bc06c420cf 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -24,6 +24,7 @@ import ( "github.com/elastic/beats/v7/libbeat/feature" "github.com/elastic/beats/v7/libbeat/logp" "github.com/elastic/go-concert/ctxtool" + "github.com/elastic/go-concert/timed" ) const ( @@ -158,21 +159,15 @@ func (in *httpJSONInput) Run(ctx v2.Context, publisher stateless.Publisher) erro err = requester.processHTTPRequest(stdCtx, publisher) if err == nil && in.config.Interval > 0 { - ticker := time.NewTicker(in.config.Interval) - defer ticker.Stop() - for { - select { - case <-stdCtx.Done(): - log.Info("Context done.") - return nil - case <-ticker.C: - log.Info("Process another repeated request.") - err = requester.processHTTPRequest(stdCtx, publisher) - if err != nil { - return err - } + periodCtx, periodCancel := context.WithCancel(stdCtx) + + timed.Periodic(periodCtx, in.config.Interval, func() { + log.Info("Process another repeated request.") + err = requester.processHTTPRequest(stdCtx, publisher) + if err != nil { + periodCancel() } - } + }) } return err From 320610bbaab3bcc75c2e32c4e45d281d7b64dbc4 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Tue, 18 Aug 2020 09:19:00 +0200 Subject: [PATCH 09/15] Update time.Periodic call with error return --- x-pack/filebeat/input/httpjson/input.go | 31 ++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index 7bc06c420cf..6afd171d1e5 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -157,17 +157,26 @@ func (in *httpJSONInput) Run(ctx v2.Context, publisher stateless.Publisher) erro log, ) - err = requester.processHTTPRequest(stdCtx, publisher) - if err == nil && in.config.Interval > 0 { - periodCtx, periodCancel := context.WithCancel(stdCtx) - - timed.Periodic(periodCtx, in.config.Interval, func() { - log.Info("Process another repeated request.") - err = requester.processHTTPRequest(stdCtx, publisher) - if err != nil { - periodCancel() - } - }) + if err := requester.processHTTPRequest(stdCtx, publisher); err != nil { + return err + } + + if in.config.Interval == 0 { + return nil + } + + err = timed.Periodic(stdCtx, in.config.Interval, func() error { + log.Info("Process another repeated request.") + return requester.processHTTPRequest(stdCtx, publisher) + }) + + if err == nil { + return nil + } + + if err == context.Canceled || err == context.DeadlineExceeded { + log.Infof("Context done: %v", err) + return nil } return err From 852a344cb71d8c3650517d04b4f81f78272ac359 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Tue, 18 Aug 2020 15:19:29 +0200 Subject: [PATCH 10/15] Change test duration values --- x-pack/filebeat/input/httpjson/httpjson_test.go | 8 ++++---- x-pack/filebeat/input/httpjson/input.go | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/x-pack/filebeat/input/httpjson/httpjson_test.go b/x-pack/filebeat/input/httpjson/httpjson_test.go index 69ffbfc8f7f..2789ffc52f1 100644 --- a/x-pack/filebeat/input/httpjson/httpjson_test.go +++ b/x-pack/filebeat/input/httpjson/httpjson_test.go @@ -94,14 +94,14 @@ func TestHTTPJSONInput(t *testing.T) { setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "POST", - "interval": "400ms", + "interval": "500ms", }, handler: defaultHandler("POST", ""), expected: []string{ `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`, `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`, }, - duration: 700 * time.Millisecond, + duration: 900 * time.Millisecond, }, { name: "Test json objects array", @@ -169,7 +169,7 @@ func TestHTTPJSONInput(t *testing.T) { }, baseConfig: map[string]interface{}{ "http_method": "GET", - "interval": "400ms", + "interval": "500ms", "date_cursor.field": "@timestamp", "date_cursor.url_field": "$filter", "date_cursor.value_template": "alertCreationTime ge {{.}}", @@ -181,7 +181,7 @@ func TestHTTPJSONInput(t *testing.T) { `{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`, `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, }, - duration: 700 * time.Millisecond, + duration: 900 * time.Millisecond, }, { name: "Test pagination", diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index 6afd171d1e5..bdf9bb12838 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -13,6 +13,7 @@ import ( "time" "github.com/hashicorp/go-retryablehttp" + "github.com/pkg/errors" "go.uber.org/zap" v2 "github.com/elastic/beats/v7/filebeat/input/v2" @@ -174,7 +175,8 @@ func (in *httpJSONInput) Run(ctx v2.Context, publisher stateless.Publisher) erro return nil } - if err == context.Canceled || err == context.DeadlineExceeded { + switch errors.Cause(err) { + case context.Canceled, context.DeadlineExceeded: log.Infof("Context done: %v", err) return nil } From 200daac7b2b9dd1c314cf122076d9789e398e7c2 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Wed, 19 Aug 2020 09:27:18 +0200 Subject: [PATCH 11/15] Sepparate sync test case --- .../filebeat/input/httpjson/httpjson_test.go | 120 +++++++++++------- 1 file changed, 74 insertions(+), 46 deletions(-) diff --git a/x-pack/filebeat/input/httpjson/httpjson_test.go b/x-pack/filebeat/input/httpjson/httpjson_test.go index 2789ffc52f1..07be72f7133 100644 --- a/x-pack/filebeat/input/httpjson/httpjson_test.go +++ b/x-pack/filebeat/input/httpjson/httpjson_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "golang.org/x/sync/errgroup" v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/common" @@ -24,13 +25,11 @@ import ( func TestHTTPJSONInput(t *testing.T) { testCases := []struct { - name string - setupServer func(*testing.T, http.HandlerFunc, map[string]interface{}) - baseConfig map[string]interface{} - handler http.HandlerFunc - expected []string - expectedError string - duration time.Duration + name string + setupServer func(*testing.T, http.HandlerFunc, map[string]interface{}) + baseConfig map[string]interface{} + handler http.HandlerFunc + expected []string }{ { name: "Test simple GET request", @@ -94,14 +93,13 @@ func TestHTTPJSONInput(t *testing.T) { setupServer: newTestServer(httptest.NewServer), baseConfig: map[string]interface{}{ "http_method": "POST", - "interval": "500ms", + "interval": "100ms", }, handler: defaultHandler("POST", ""), expected: []string{ `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`, `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`, }, - duration: 900 * time.Millisecond, }, { name: "Test json objects array", @@ -169,7 +167,7 @@ func TestHTTPJSONInput(t *testing.T) { }, baseConfig: map[string]interface{}{ "http_method": "GET", - "interval": "500ms", + "interval": "100ms", "date_cursor.field": "@timestamp", "date_cursor.url_field": "$filter", "date_cursor.value_template": "alertCreationTime ge {{.}}", @@ -181,7 +179,6 @@ func TestHTTPJSONInput(t *testing.T) { `{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`, `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, }, - duration: 900 * time.Millisecond, }, { name: "Test pagination", @@ -196,18 +193,6 @@ func TestHTTPJSONInput(t *testing.T) { handler: paginationHandler(), expected: []string{`{"foo":"bar"}`, `{"foo":"bar"}`}, }, - { - name: "Test loop breaks on irrecoverable failure", - setupServer: newTestServer(httptest.NewServer), - baseConfig: map[string]interface{}{ - "http_method": "GET", - "interval": "300ms", - "retry.max_attempts": 1, - }, - handler: failAfterFirstAttemptHandler(), - expectedError: "giving up after 2 attempts", - expected: []string{`{"hello":"world"}`}, - }, { name: "Test oauth2", setupServer: func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) { @@ -247,31 +232,79 @@ func TestHTTPJSONInput(t *testing.T) { pub := beattest.NewChanClient(len(tc.expected)) t.Cleanup(func() { _ = pub.Close() }) - ctx, cancel := newV2Context(tc.duration) + ctx, cancel := newV2Context() t.Cleanup(cancel) - err = input.Run(ctx, pub) - switch tc.expectedError { - case "": - assert.NoError(t, err) - default: - // retryable client errors use dynamic method / host / port in the message - // and no custom type. There is no other easy way to test for a specific one - assert.Contains(t, err.Error(), tc.expectedError) - } + var g errgroup.Group + g.Go(func() error { return input.Run(ctx, pub) }) - assert.Equal(t, len(tc.expected), len(pub.Channel)) - for _, e := range tc.expected { - got := pub.ReceiveEvent() + timeout := time.NewTimer(5 * time.Second) + t.Cleanup(func() { _ = timeout.Stop() }) - val, err := got.Fields.GetValue("message") - assert.NoError(t, err) - assert.JSONEq(t, e, val.(string)) + var receivedCount int + wait: + for { + select { + case <-timeout.C: + t.Errorf("timed out waiting for %d events", len(tc.expected)) + return + case got := <-pub.Channel: + val, err := got.Fields.GetValue("message") + assert.NoError(t, err) + assert.JSONEq(t, tc.expected[receivedCount], val.(string)) + receivedCount += 1 + if receivedCount == len(tc.expected) { + cancel() + break wait + } + } } + assert.NoError(t, g.Wait()) }) } } +func TestLoopBreaksOnIrrecoverableFailure(t *testing.T) { + baseConfig := map[string]interface{}{ + "http_method": "GET", + "interval": "100ms", + "retry.max_attempts": 1, + } + + expected := `{"hello":"world"}` + + setupServer := newTestServer(httptest.NewServer) + + setupServer(t, failAfterFirstAttemptHandler(), baseConfig) + + cfg := common.MustNewConfigFrom(baseConfig) + + input, err := configure(cfg) + + assert.NoError(t, err) + assert.Equal(t, "httpjson", input.Name()) + assert.NoError(t, input.Test(v2.TestContext{})) + + pub := beattest.NewChanClient(len(expected)) + t.Cleanup(func() { _ = pub.Close() }) + + ctx, cancel := newV2Context() + t.Cleanup(cancel) + + if err := input.Run(ctx, pub); assert.Error(t, err) { + // retryable client errors use dynamic method / host / port in the message + // and no custom type. There is no other easy way to test for a specific one + assert.Contains(t, err.Error(), "giving up after 2 attempts") + } + + if assert.Equal(t, 1, len(pub.Channel)) { + got := pub.ReceiveEvent() + val, err := got.Fields.GetValue("message") + assert.NoError(t, err) + assert.JSONEq(t, expected, val.(string)) + } +} + func newTestServer( newServer func(http.Handler) *httptest.Server, ) func(*testing.T, http.HandlerFunc, map[string]interface{}) { @@ -282,13 +315,8 @@ func newTestServer( } } -func newV2Context(d time.Duration) (v2.Context, func()) { - ctx, cancel := func() (context.Context, func()) { - if d == 0 { - return context.WithCancel(context.Background()) - } - return context.WithTimeout(context.Background(), d) - }() +func newV2Context() (v2.Context, func()) { + ctx, cancel := context.WithCancel(context.Background()) return v2.Context{ Logger: logp.NewLogger("httpjson_test"), ID: "test_id", From 25234dda5a668e1c2a11fe1fcbd34be26fcb9f36 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Wed, 19 Aug 2020 10:11:16 +0200 Subject: [PATCH 12/15] Create custon url config type --- x-pack/filebeat/input/httpjson/config.go | 18 ++++++++++++++++- x-pack/filebeat/input/httpjson/config_test.go | 11 ++++++++++ x-pack/filebeat/input/httpjson/date_cursor.go | 20 +++++++------------ .../filebeat/input/httpjson/httpjson_test.go | 8 ++++++++ x-pack/filebeat/input/httpjson/input.go | 14 ++++--------- 5 files changed, 47 insertions(+), 24 deletions(-) diff --git a/x-pack/filebeat/input/httpjson/config.go b/x-pack/filebeat/input/httpjson/config.go index 2be0eb6f211..e384071e3a8 100644 --- a/x-pack/filebeat/input/httpjson/config.go +++ b/x-pack/filebeat/input/httpjson/config.go @@ -5,6 +5,7 @@ package httpjson import ( + "net/url" "regexp" "strings" "text/template" @@ -35,7 +36,7 @@ type config struct { RetryWaitMin time.Duration `config:"retry.wait_min"` RetryWaitMax time.Duration `config:"retry.wait_max"` TLS *tlscommon.Config `config:"ssl"` - URL string `config:"url" validate:"required"` + URL *URL `config:"url" validate:"required"` DateCursor *DateCursor `config:"date_cursor"` } @@ -92,6 +93,21 @@ func (t *Template) Unpack(in string) error { return nil } +type URL struct { + *url.URL +} + +func (u *URL) Unpack(in string) error { + parsed, err := url.Parse(in) + if err != nil { + return err + } + + *u = URL{URL: parsed} + + return nil +} + // IsEnabled returns true if the `enable` field is set to true in the yaml. func (dc *DateCursor) IsEnabled() bool { return dc != nil && (dc.Enabled == nil || *dc.Enabled) diff --git a/x-pack/filebeat/input/httpjson/config_test.go b/x-pack/filebeat/input/httpjson/config_test.go index c3486aedda4..148f136f08d 100644 --- a/x-pack/filebeat/input/httpjson/config_test.go +++ b/x-pack/filebeat/input/httpjson/config_test.go @@ -11,6 +11,7 @@ import ( "time" "github.com/pkg/errors" + "github.com/stretchr/testify/assert" "golang.org/x/oauth2/google" "github.com/elastic/beats/v7/libbeat/common" @@ -110,6 +111,16 @@ func TestConfigValidationCase7(t *testing.T) { } } +func TestConfigMustFailWithInvalidURL(t *testing.T) { + m := map[string]interface{}{ + "url": "::invalid::", + } + cfg := common.MustNewConfigFrom(m) + conf := defaultConfig() + err := cfg.Unpack(&conf) + assert.EqualError(t, err, `parse "::invalid::": missing protocol scheme accessing 'url'`) +} + func TestConfigOauth2Validation(t *testing.T) { cases := []struct { name string diff --git a/x-pack/filebeat/input/httpjson/date_cursor.go b/x-pack/filebeat/input/httpjson/date_cursor.go index 1f18673d181..2a9db44bd2a 100644 --- a/x-pack/filebeat/input/httpjson/date_cursor.go +++ b/x-pack/filebeat/input/httpjson/date_cursor.go @@ -17,7 +17,7 @@ type dateCursor struct { log *logp.Logger enabled bool field string - url string + url url.URL urlField string initialInterval time.Duration dateFormat string @@ -29,7 +29,7 @@ type dateCursor struct { func newDateCursorFromConfig(config config, log *logp.Logger) *dateCursor { c := &dateCursor{ enabled: config.DateCursor.IsEnabled(), - url: config.URL, + url: *config.URL.URL, } if !c.enabled { @@ -38,7 +38,6 @@ func newDateCursorFromConfig(config config, log *logp.Logger) *dateCursor { c.log = log c.field = config.DateCursor.Field - c.url = config.URL c.urlField = config.DateCursor.URLField c.initialInterval = config.DateCursor.InitialInterval c.dateFormat = config.DateCursor.GetDateFormat() @@ -49,7 +48,7 @@ func newDateCursorFromConfig(config config, log *logp.Logger) *dateCursor { func (c *dateCursor) getURL() string { if !c.enabled { - return c.url + return c.url.String() } var dateStr string @@ -60,12 +59,7 @@ func (c *dateCursor) getURL() string { dateStr = c.value } - url, err := url.Parse(c.url) - if err != nil { - return c.url - } - - q := url.Query() + q := c.url.Query() var value string if c.valueTpl == nil { @@ -73,16 +67,16 @@ func (c *dateCursor) getURL() string { } else { buf := new(bytes.Buffer) if err := c.valueTpl.Template.Execute(buf, dateStr); err != nil { - return c.url + return c.url.String() } value = buf.String() } q.Set(c.urlField, value) - url.RawQuery = q.Encode() + c.url.RawQuery = q.Encode() - return url.String() + return c.url.String() } func (c *dateCursor) advance(m common.MapStr) { diff --git a/x-pack/filebeat/input/httpjson/httpjson_test.go b/x-pack/filebeat/input/httpjson/httpjson_test.go index 07be72f7133..7818f09997e 100644 --- a/x-pack/filebeat/input/httpjson/httpjson_test.go +++ b/x-pack/filebeat/input/httpjson/httpjson_test.go @@ -178,6 +178,7 @@ func TestHTTPJSONInput(t *testing.T) { expected: []string{ `{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`, `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`, }, }, { @@ -453,6 +454,13 @@ func dateCursorHandler() http.HandlerFunc { return } _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`)) + case 2: + if r.URL.Query().Get("$filter") != "alertCreationTime ge 2002-10-02T15:00:01Z" { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong cursor value"`)) + return + } + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`)) } count += 1 } diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index bdf9bb12838..edfcbb34be6 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -9,7 +9,6 @@ import ( "fmt" "net" "net/http" - "net/url" "time" "github.com/hashicorp/go-retryablehttp" @@ -107,23 +106,18 @@ func newHTTPJSONInput(config config) (*httpJSONInput, error) { func (*httpJSONInput) Name() string { return inputName } func (in *httpJSONInput) Test(v2.TestContext) error { - url, err := url.Parse(in.config.URL) - if err != nil { - return err - } - port := func() string { - if url.Port() != "" { - return url.Port() + if in.config.URL.Port() != "" { + return in.config.URL.Port() } - switch url.Scheme { + switch in.config.URL.Scheme { case "https": return "443" } return "80" }() - _, err = net.DialTimeout("tcp", net.JoinHostPort(url.Hostname(), port), time.Second) + _, err := net.DialTimeout("tcp", net.JoinHostPort(in.config.URL.Hostname(), port), time.Second) if err != nil { return fmt.Errorf("url %q is unreachable", in.config.URL) } From f18833d26ef8f5d7ccca3fc0588ec3cb94c28be4 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Wed, 19 Aug 2020 14:35:48 +0200 Subject: [PATCH 13/15] Change input.Run comment --- x-pack/filebeat/input/httpjson/input.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index edfcbb34be6..40c153b41a1 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -125,8 +125,8 @@ func (in *httpJSONInput) Test(v2.TestContext) error { return nil } -// Run starts the input worker then returns. Only the first invocation -// will ever start the worker. +// Run starts the input and blocks until it ends the execution. +// It will return on context cancellation or irrecoverable errors. func (in *httpJSONInput) Run(ctx v2.Context, publisher stateless.Publisher) error { log := ctx.Logger.With("url", in.config.URL) From e5d5be5e0d8cc451927f8fde880162f5849763a8 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Wed, 19 Aug 2020 16:36:47 +0200 Subject: [PATCH 14/15] Change input.Run to only return on context cancellation --- .../filebeat/input/httpjson/httpjson_test.go | 54 ------------------- x-pack/filebeat/input/httpjson/input.go | 27 ++++------ 2 files changed, 9 insertions(+), 72 deletions(-) diff --git a/x-pack/filebeat/input/httpjson/httpjson_test.go b/x-pack/filebeat/input/httpjson/httpjson_test.go index 7818f09997e..b541c16002e 100644 --- a/x-pack/filebeat/input/httpjson/httpjson_test.go +++ b/x-pack/filebeat/input/httpjson/httpjson_test.go @@ -265,47 +265,6 @@ func TestHTTPJSONInput(t *testing.T) { } } -func TestLoopBreaksOnIrrecoverableFailure(t *testing.T) { - baseConfig := map[string]interface{}{ - "http_method": "GET", - "interval": "100ms", - "retry.max_attempts": 1, - } - - expected := `{"hello":"world"}` - - setupServer := newTestServer(httptest.NewServer) - - setupServer(t, failAfterFirstAttemptHandler(), baseConfig) - - cfg := common.MustNewConfigFrom(baseConfig) - - input, err := configure(cfg) - - assert.NoError(t, err) - assert.Equal(t, "httpjson", input.Name()) - assert.NoError(t, input.Test(v2.TestContext{})) - - pub := beattest.NewChanClient(len(expected)) - t.Cleanup(func() { _ = pub.Close() }) - - ctx, cancel := newV2Context() - t.Cleanup(cancel) - - if err := input.Run(ctx, pub); assert.Error(t, err) { - // retryable client errors use dynamic method / host / port in the message - // and no custom type. There is no other easy way to test for a specific one - assert.Contains(t, err.Error(), "giving up after 2 attempts") - } - - if assert.Equal(t, 1, len(pub.Channel)) { - got := pub.ReceiveEvent() - val, err := got.Fields.GetValue("message") - assert.NoError(t, err) - assert.JSONEq(t, expected, val.(string)) - } -} - func newTestServer( newServer func(http.Handler) *httptest.Server, ) func(*testing.T, http.HandlerFunc, map[string]interface{}) { @@ -376,19 +335,6 @@ func retryHandler() http.HandlerFunc { } } -func failAfterFirstAttemptHandler() http.HandlerFunc { - count := 0 - return func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("content-type", "application/json") - if count == 1 { - w.WriteHeader(rand.Intn(100) + 500) - return - } - _, _ = w.Write([]byte(`{"hello":"world"}`)) - count += 1 - } -} - func oauth2TokenHandler(w http.ResponseWriter, r *http.Request) { w.Header().Set("content-type", "application/json") _ = r.ParseForm() diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index 40c153b41a1..766fa364864 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -12,7 +12,6 @@ import ( "time" "github.com/hashicorp/go-retryablehttp" - "github.com/pkg/errors" "go.uber.org/zap" v2 "github.com/elastic/beats/v7/filebeat/input/v2" @@ -126,7 +125,7 @@ func (in *httpJSONInput) Test(v2.TestContext) error { } // Run starts the input and blocks until it ends the execution. -// It will return on context cancellation or irrecoverable errors. +// It will return on context cancellation, any other error will be retried. func (in *httpJSONInput) Run(ctx v2.Context, publisher stateless.Publisher) error { log := ctx.Logger.With("url", in.config.URL) @@ -152,30 +151,22 @@ func (in *httpJSONInput) Run(ctx v2.Context, publisher stateless.Publisher) erro log, ) - if err := requester.processHTTPRequest(stdCtx, publisher); err != nil { - return err - } - + // TODO: disallow passing interval = 0 as a mean to run once. if in.config.Interval == 0 { - return nil + return requester.processHTTPRequest(stdCtx, publisher) } err = timed.Periodic(stdCtx, in.config.Interval, func() error { log.Info("Process another repeated request.") - return requester.processHTTPRequest(stdCtx, publisher) - }) - - if err == nil { + if err := requester.processHTTPRequest(stdCtx, publisher); err != nil { + log.Error(err) + } return nil - } + }) - switch errors.Cause(err) { - case context.Canceled, context.DeadlineExceeded: - log.Infof("Context done: %v", err) - return nil - } + log.Infof("Context done: %v", err) - return err + return nil } func (in *httpJSONInput) newHTTPClient(ctx context.Context) (*http.Client, error) { From dbe98a1d7faaf441d95e6c26309650503fa4152a Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Wed, 19 Aug 2020 16:37:55 +0200 Subject: [PATCH 15/15] Remove all usages of pkg/errors --- x-pack/filebeat/input/httpjson/config.go | 14 +++++++------- .../filebeat/input/httpjson/config_oauth.go | 2 +- x-pack/filebeat/input/httpjson/config_test.go | 2 +- x-pack/filebeat/input/httpjson/pagination.go | 13 ++++++------- .../filebeat/input/httpjson/rate_limiter.go | 15 +++++++-------- x-pack/filebeat/input/httpjson/requester.go | 19 +++++++++---------- 6 files changed, 31 insertions(+), 34 deletions(-) diff --git a/x-pack/filebeat/input/httpjson/config.go b/x-pack/filebeat/input/httpjson/config.go index e384071e3a8..95ca205be0d 100644 --- a/x-pack/filebeat/input/httpjson/config.go +++ b/x-pack/filebeat/input/httpjson/config.go @@ -5,14 +5,14 @@ package httpjson import ( + "errors" + "fmt" "net/url" "regexp" "strings" "text/template" "time" - "github.com/pkg/errors" - "github.com/elastic/beats/v7/libbeat/common" "github.com/elastic/beats/v7/libbeat/common/transport/tlscommon" ) @@ -137,26 +137,26 @@ func (c *config) Validate() error { case "GET", "POST": break default: - return errors.Errorf("httpjson input: Invalid http_method, %s", c.HTTPMethod) + return fmt.Errorf("httpjson input: Invalid http_method, %s", c.HTTPMethod) } if c.NoHTTPBody { if len(c.HTTPRequestBody) > 0 { - return errors.Errorf("invalid configuration: both no_http_body and http_request_body cannot be set simultaneously") + return errors.New("invalid configuration: both no_http_body and http_request_body cannot be set simultaneously") } if c.Pagination != nil && (len(c.Pagination.ExtraBodyContent) > 0 || c.Pagination.RequestField != "") { - return errors.Errorf("invalid configuration: both no_http_body and pagination.extra_body_content or pagination.req_field cannot be set simultaneously") + return errors.New("invalid configuration: both no_http_body and pagination.extra_body_content or pagination.req_field cannot be set simultaneously") } } if c.Pagination != nil { if c.Pagination.Header != nil { if c.Pagination.RequestField != "" || c.Pagination.IDField != "" || len(c.Pagination.ExtraBodyContent) > 0 { - return errors.Errorf("invalid configuration: both pagination.header and pagination.req_field or pagination.id_field or pagination.extra_body_content cannot be set simultaneously") + return errors.New("invalid configuration: both pagination.header and pagination.req_field or pagination.id_field or pagination.extra_body_content cannot be set simultaneously") } } } if c.OAuth2.IsEnabled() { if c.APIKey != "" || c.AuthenticationScheme != "" { - return errors.Errorf("invalid configuration: oauth2 and api_key or authentication_scheme cannot be set simultaneously") + return errors.New("invalid configuration: oauth2 and api_key or authentication_scheme cannot be set simultaneously") } } return nil diff --git a/x-pack/filebeat/input/httpjson/config_oauth.go b/x-pack/filebeat/input/httpjson/config_oauth.go index 6a09cf2fb92..0ff55dcbc33 100644 --- a/x-pack/filebeat/input/httpjson/config_oauth.go +++ b/x-pack/filebeat/input/httpjson/config_oauth.go @@ -7,13 +7,13 @@ package httpjson import ( "context" "encoding/json" + "errors" "fmt" "io/ioutil" "net/http" "os" "strings" - "github.com/pkg/errors" "golang.org/x/oauth2" "golang.org/x/oauth2/clientcredentials" "golang.org/x/oauth2/endpoints" diff --git a/x-pack/filebeat/input/httpjson/config_test.go b/x-pack/filebeat/input/httpjson/config_test.go index 148f136f08d..0de07311239 100644 --- a/x-pack/filebeat/input/httpjson/config_test.go +++ b/x-pack/filebeat/input/httpjson/config_test.go @@ -6,11 +6,11 @@ package httpjson import ( "context" + "errors" "os" "testing" "time" - "github.com/pkg/errors" "github.com/stretchr/testify/assert" "golang.org/x/oauth2/google" diff --git a/x-pack/filebeat/input/httpjson/pagination.go b/x-pack/filebeat/input/httpjson/pagination.go index 330528a82df..9a7bf82b2b4 100644 --- a/x-pack/filebeat/input/httpjson/pagination.go +++ b/x-pack/filebeat/input/httpjson/pagination.go @@ -5,13 +5,12 @@ package httpjson import ( + "errors" "fmt" "net/http" "net/url" "regexp" - "github.com/pkg/errors" - "github.com/elastic/beats/v7/libbeat/common" ) @@ -48,7 +47,7 @@ func (p *pagination) nextRequestInfo(ri *requestInfo, response response, lastObj // Pagination control using HTTP Body fields if err = p.setRequestInfoFromBody(response.body, lastObj, ri); err != nil { // if the field is not found, there is no next page - if errors.Cause(err) == common.ErrKeyNotFound { + if errors.Is(err, common.ErrKeyNotFound) { return ri, false, nil } return ri, false, err @@ -60,7 +59,7 @@ func (p *pagination) nextRequestInfo(ri *requestInfo, response response, lastObj // Pagination control using HTTP Header url, err := getNextLinkFromHeader(response.header, p.header.FieldName, p.header.RegexPattern) if err != nil { - return ri, false, errors.Wrapf(err, "failed to retrieve the next URL for pagination") + return ri, false, fmt.Errorf("failed to retrieve the next URL for pagination: %w", err) } if ri.url == url || url == "" { return ri, false, nil @@ -75,7 +74,7 @@ func (p *pagination) nextRequestInfo(ri *requestInfo, response response, lastObj func getNextLinkFromHeader(header http.Header, fieldName string, re *regexp.Regexp) (string, error) { links, ok := header[fieldName] if !ok { - return "", errors.Errorf("field %s does not exist in the HTTP Header", fieldName) + return "", fmt.Errorf("field %s does not exist in the HTTP Header", fieldName) } for _, link := range links { matchArray := re.FindAllStringSubmatch(link, -1) @@ -95,11 +94,11 @@ func (p *pagination) setRequestInfoFromBody(response, last common.MapStr, ri *re } if err != nil { - return errors.Wrapf(err, "failed to retrieve id_field for pagination") + return fmt.Errorf("failed to retrieve id_field for pagination: %w", err) } if p.requestField != "" { - ri.contentMap.Put(p.requestField, v) + _, _ = ri.contentMap.Put(p.requestField, v) if p.url != "" { ri.url = p.url } diff --git a/x-pack/filebeat/input/httpjson/rate_limiter.go b/x-pack/filebeat/input/httpjson/rate_limiter.go index 95cc969e1d5..57d206224ac 100644 --- a/x-pack/filebeat/input/httpjson/rate_limiter.go +++ b/x-pack/filebeat/input/httpjson/rate_limiter.go @@ -6,12 +6,11 @@ package httpjson import ( "context" + "fmt" "net/http" "strconv" "time" - "github.com/pkg/errors" - "github.com/elastic/beats/v7/libbeat/logp" ) @@ -45,7 +44,7 @@ func (r *rateLimiter) execute(ctx context.Context, f func(context.Context) (*htt header := resp.Header if err != nil { - return nil, errors.Wrapf(err, "failed to read http.response.body") + return nil, fmt.Errorf("failed to read http.response.body: %w", err) } if r == nil || resp.StatusCode == http.StatusOK { @@ -53,7 +52,7 @@ func (r *rateLimiter) execute(ctx context.Context, f func(context.Context) (*htt } if resp.StatusCode != http.StatusTooManyRequests { - return nil, errors.Errorf("http request was unsuccessful with a status code %d", resp.StatusCode) + return nil, fmt.Errorf("http request was unsuccessful with a status code %d", resp.StatusCode) } if err := r.applyRateLimit(ctx, header); err != nil { @@ -104,11 +103,11 @@ func (r *rateLimiter) getRateLimit(header http.Header) (int64, error) { remaining := header.Get(r.remaining) if remaining == "" { - return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", r.remaining) + return 0, fmt.Errorf("field %s does not exist in the HTTP Header, or is empty", r.remaining) } m, err := strconv.ParseInt(remaining, 10, 64) if err != nil { - return 0, errors.Wrapf(err, "failed to parse rate-limit remaining value") + return 0, fmt.Errorf("failed to parse rate-limit remaining value: %w", err) } if m != 0 { @@ -117,11 +116,11 @@ func (r *rateLimiter) getRateLimit(header http.Header) (int64, error) { reset := header.Get(r.reset) if reset == "" { - return 0, errors.Errorf("field %s does not exist in the HTTP Header, or is empty", r.reset) + return 0, fmt.Errorf("field %s does not exist in the HTTP Header, or is empty", r.reset) } epoch, err := strconv.ParseInt(reset, 10, 64) if err != nil { - return 0, errors.Wrapf(err, "failed to parse rate-limit reset value") + return 0, fmt.Errorf("failed to parse rate-limit reset value: %w", err) } if time.Unix(epoch, 0).Sub(time.Now()) <= 0 { return 0, nil diff --git a/x-pack/filebeat/input/httpjson/requester.go b/x-pack/filebeat/input/httpjson/requester.go index af169cfe137..579e5e26756 100644 --- a/x-pack/filebeat/input/httpjson/requester.go +++ b/x-pack/filebeat/input/httpjson/requester.go @@ -8,12 +8,11 @@ import ( "bytes" "context" "encoding/json" + "fmt" "io" "io/ioutil" "net/http" - "github.com/pkg/errors" - stateless "github.com/elastic/beats/v7/filebeat/input/v2/input-stateless" "github.com/elastic/beats/v7/libbeat/common" "github.com/elastic/beats/v7/libbeat/logp" @@ -98,11 +97,11 @@ func (r *requester) processHTTPRequest(ctx context.Context, publisher stateless. func(ctx context.Context) (*http.Response, error) { req, err := r.createHTTPRequest(ctx, ri) if err != nil { - return nil, errors.Wrapf(err, "failed to create http request") + return nil, fmt.Errorf("failed to create http request: %w", err) } msg, err := r.client.Do(req) if err != nil { - return nil, errors.Wrapf(err, "failed to execute http client.Do") + return nil, fmt.Errorf("failed to execute http client.Do: %w", err) } return msg, nil }, @@ -113,13 +112,13 @@ func (r *requester) processHTTPRequest(ctx context.Context, publisher stateless. responseData, err := ioutil.ReadAll(resp.Body) if err != nil { - return errors.Wrapf(err, "failed to read http response") + return fmt.Errorf("failed to read http response: %w", err) } _ = resp.Body.Close() if err = json.Unmarshal(responseData, &m); err != nil { r.log.Debug("failed to unmarshal http.response.body", string(responseData)) - return errors.Wrapf(err, "failed to unmarshal http.response.body %q", string(responseData)) + return fmt.Errorf("failed to unmarshal http.response.body: %w", err) } switch obj := m.(type) { @@ -151,12 +150,12 @@ func (r *requester) processHTTPRequest(ctx context.Context, publisher stateless. return err } default: - return errors.Errorf("content of %s is not a valid array", r.jsonObjects) + return fmt.Errorf("content of %s is not a valid array", r.jsonObjects) } } default: r.log.Debug("http.response.body is not a valid JSON object", string(responseData)) - return errors.Errorf("http.response.body is not a valid JSON object, but a %T", obj) + return fmt.Errorf("http.response.body is not a valid JSON object, but a %T", obj) } ri, hasNext, err = r.pagination.nextRequestInfo(ri, response, lastObj) @@ -219,12 +218,12 @@ func (r *requester) processEventArray(publisher stateless.Publisher, events []in last = e d, err := json.Marshal(e) if err != nil { - return nil, errors.Wrapf(err, "failed to marshal %+v", e) + return nil, fmt.Errorf("failed to marshal %+v: %w", e, err) } publisher.Publish(makeEvent(string(d))) } default: - return nil, errors.Errorf("expected only JSON objects in the array but got a %T", v) + return nil, fmt.Errorf("expected only JSON objects in the array but got a %T", v) } } return last, nil