From 68df8225ab021736e090a9c210082861b4548aee Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Thu, 27 Apr 2023 17:26:51 +0200 Subject: [PATCH 01/46] Fix input reload under Elastic-Agent This commit fixes the input reload issue under Elastic-Agent by creating an infinity retry logic in the ManagerV2. This is exactly the same logic used by the configuration reload on a standalone Beat. Now if when reloading inputs, there is at least one `common.ErrInputNotFinished` a `forceReload` flag is set to true and the debounce timer is started. This process will repeat untill no `common.ErrInputNotFinished` is returned. The `changeDebounce` period is increased to 1s and the `forceReloadDebounce` period is set to `10 x changeDebounce`. --- filebeat/input/log/input.go | 5 +- libbeat/common/errors.go | 1 + x-pack/libbeat/management/managerV2.go | 73 +++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 9 deletions(-) diff --git a/filebeat/input/log/input.go b/filebeat/input/log/input.go index eeaa2de0c915..a3ca59d55da2 100644 --- a/filebeat/input/log/input.go +++ b/filebeat/input/log/input.go @@ -192,7 +192,10 @@ func (p *Input) loadStates(states []file.State) error { // In case a input is tried to be started with an unfinished state matching the glob pattern if !state.Finished { - return &common.ErrInputNotFinished{State: state.String()} + return &common.ErrInputNotFinished{ + State: state.String(), + File: state.Fileinfo.Name(), + } } // Convert state to current identifier if different diff --git a/libbeat/common/errors.go b/libbeat/common/errors.go index 68fecb8f5504..9f5248e815ed 100644 --- a/libbeat/common/errors.go +++ b/libbeat/common/errors.go @@ -24,6 +24,7 @@ import ( // ErrInputNotFinished struct for reporting errors related to not finished inputs type ErrInputNotFinished struct { State string + File string } // Error method of ErrInputNotFinished diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 922170177d71..04f688c925c3 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -6,18 +6,22 @@ package management import ( "context" + "errors" "fmt" "os" "os/signal" "sync" + "sync/atomic" "syscall" "time" "github.com/joeshaw/multierror" + pkgerr "github.com/pkg/errors" "go.uber.org/zap/zapcore" gproto "google.golang.org/protobuf/proto" "gopkg.in/yaml.v2" + "github.com/elastic/beats/v7/libbeat/common" "github.com/elastic/beats/v7/libbeat/features" "github.com/elastic/elastic-agent-client/v7/pkg/client" "github.com/elastic/elastic-agent-client/v7/pkg/proto" @@ -49,9 +53,11 @@ type BeatV2Manager struct { errCanceller context.CancelFunc // track individual units given to us by the V2 API - mx sync.Mutex - units map[unitKey]*client.Unit - actions []client.Action + mx sync.Mutex + units map[unitKey]*client.Unit + actions []client.Action + forceReload atomic.Bool + reloadLock sync.Mutex // status is reported as a whole for every unit sent to this component // hopefully this can be improved in the future to be separated per unit @@ -378,7 +384,13 @@ func (cm *BeatV2Manager) watchErrChan(ctx context.Context) { } func (cm *BeatV2Manager) unitListen() { - const changeDebounce = 100 * time.Millisecond + const changeDebounce = time.Second + + // forceReloadDebounce is greater than changeDebounce because it is only + // used when an input has not reached its finished state, this means some events + // still need to be acked by the acker, hence the longer we wait the more likely + // for the input to have reached its finished state. + const forceReloadDebounce = changeDebounce * 10 // register signal handler sigc := make(chan os.Signal, 1) @@ -439,6 +451,10 @@ func (cm *BeatV2Manager) unitListen() { } cm.mx.Unlock() cm.reload(units) + if cm.forceReload.Load() { + // Restart the debounce timer so we try to reload the inputs. + t.Reset(forceReloadDebounce) + } } } } @@ -618,6 +634,8 @@ func (cm *BeatV2Manager) reloadOutput(unit *client.Unit) error { } func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { + cm.reloadLock.Lock() + defer cm.reloadLock.Unlock() obj := cm.registry.GetInputList() if obj == nil { return fmt.Errorf("failed to find beat reloadable type 'input'") @@ -642,15 +660,54 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { inputBeatCfgs = append(inputBeatCfgs, inputCfg...) } - if !didChange(cm.lastInputCfgs, inputCfgs) { + if !didChange(cm.lastInputCfgs, inputCfgs) && !cm.forceReload.Load() { cm.logger.Debug("Skipped reloading input units; configuration didn't change") return nil } - err := obj.Reload(inputBeatCfgs) - if err != nil { - return fmt.Errorf("failed to reloading inputs: %w", err) + if cm.forceReload.Load() { + cm.logger.Debug("Reloading Beats inputs because forceReload is true") + } + + if err := obj.Reload(inputBeatCfgs); err != nil { + merror := &multierror.MultiError{} + realErrors := multierror.Errors{} + + // At the moment this logic is tightly bound to the current RunnerList + // implementation from libbeat/cfgfile/list.go and Input.loadStates from + // filebeat/input/log/input.go. + // If they change the way they report errors, this will break. + // TODO (Tiago): update all layers to use the most recent features from + // the standard library errors package. + if errors.As(err, &merror) { + for _, err := range merror.Errors { + cause := pkgerr.Cause(err) + // A Log input is only marked as finished when all events it + // produceds are acked by the acker so when we see this error, + // we just retry until the new input can be started. + // This is the same logic used by the standalone configuration file + // reloader implemented on libbeat/cfgfile/reload.go + if err, ok := cause.(*common.ErrInputNotFinished); ok { + cm.logger.Debugf("file %q is not finished, will retry starting the input later", err.File) + cm.forceReload.Store(true) + cm.logger.Debug("ForceReload set to TRUE") + continue + } + + // This is an error that cannot be ignored, so we report it + realErrors = append(realErrors, err) + } + } + + if len(realErrors) != 0 { + return fmt.Errorf("failed to reload inputs: %w", realErrors.Err()) + } + } else { + // no issues while reloading inputs, set forceReload to false + cm.forceReload.Store(false) + cm.logger.Info("ForceReload set to FALSE") } + cm.lastInputCfgs = inputCfgs cm.lastBeatInputCfgs = inputBeatCfgs return nil From 7fce9001d001407ec270a4fb0821f93d15ae3d77 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 28 Apr 2023 12:52:52 +0200 Subject: [PATCH 02/46] PR improvements Use the `errors` package API instead of type conversion. --- x-pack/libbeat/management/managerV2.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 04f688c925c3..66c62f963816 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -687,8 +687,9 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { // we just retry until the new input can be started. // This is the same logic used by the standalone configuration file // reloader implemented on libbeat/cfgfile/reload.go - if err, ok := cause.(*common.ErrInputNotFinished); ok { - cm.logger.Debugf("file %q is not finished, will retry starting the input later", err.File) + inputNotFinishedErr := &common.ErrInputNotFinished{} + if ok := errors.As(cause, &inputNotFinishedErr); ok { + cm.logger.Debugf("file %q is not finished, will retry starting the input in %s", inputNotFinishedErr.File) cm.forceReload.Store(true) cm.logger.Debug("ForceReload set to TRUE") continue From 4d0732bedef00214e4035142d2efce42543606a9 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 28 Apr 2023 13:04:52 +0200 Subject: [PATCH 03/46] Address lint issues Address all lint issues --- filebeat/input/log/input.go | 24 ++++++++----------- x-pack/libbeat/management/managerV2.go | 32 +++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/filebeat/input/log/input.go b/filebeat/input/log/input.go index a3ca59d55da2..46ed594d09a7 100644 --- a/filebeat/input/log/input.go +++ b/filebeat/input/log/input.go @@ -93,7 +93,9 @@ func NewInput( cleanupNeeded := true cleanupIfNeeded := func(f func() error) { if cleanupNeeded { - f() + if err := f(); err != nil { + logp.L().Named("input.log").Errorf("clean up function retruned an error: %w", err) + } } } @@ -103,10 +105,10 @@ func NewInput( return nil, err } if err := inputConfig.resolveRecursiveGlobs(); err != nil { - return nil, fmt.Errorf("Failed to resolve recursive globs in config: %v", err) + return nil, fmt.Errorf("Failed to resolve recursive globs in config: %w", err) } if err := inputConfig.normalizeGlobPatterns(); err != nil { - return nil, fmt.Errorf("Failed to normalize globs patterns: %v", err) + return nil, fmt.Errorf("Failed to normalize globs patterns: %w", err) } if len(inputConfig.Paths) == 0 { @@ -115,7 +117,7 @@ func NewInput( identifier, err := file.NewStateIdentifier(inputConfig.FileIdentity) if err != nil { - return nil, fmt.Errorf("failed to initialize file identity generator: %+v", err) + return nil, fmt.Errorf("failed to initialize file identity generator: %w", err) } // Note: underlying output. @@ -551,7 +553,7 @@ func (p *Input) scan() { if isNewState { logger.Debugf("Start harvester for new file: %s", newState.Source) err := p.startHarvester(logger, newState, 0) - if err == errHarvesterLimit { + if errors.Is(err, errHarvesterLimit) { logger.Debugf(harvesterErrMsg, newState.Source, err) continue } @@ -676,11 +678,7 @@ func (p *Input) isIgnoreOlder(state file.State) bool { } modTime := state.Fileinfo.ModTime() - if time.Since(modTime) > p.config.IgnoreOlder { - return true - } - - return false + return time.Since(modTime) > p.config.IgnoreOlder } // isCleanInactive checks if the given state false under clean_inactive @@ -691,11 +689,7 @@ func (p *Input) isCleanInactive(state file.State) bool { } modTime := state.Fileinfo.ModTime() - if time.Since(modTime) > p.config.CleanInactive { - return true - } - - return false + return time.Since(modTime) > p.config.CleanInactive } // subOutletWrap returns a factory method that will wrap the passed outlet diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 66c62f963816..bd92b6db568e 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -16,7 +16,6 @@ import ( "time" "github.com/joeshaw/multierror" - pkgerr "github.com/pkg/errors" "go.uber.org/zap/zapcore" gproto "google.golang.org/protobuf/proto" "gopkg.in/yaml.v2" @@ -681,14 +680,14 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { // the standard library errors package. if errors.As(err, &merror) { for _, err := range merror.Errors { - cause := pkgerr.Cause(err) + causeErr := cause(err) // A Log input is only marked as finished when all events it // produceds are acked by the acker so when we see this error, // we just retry until the new input can be started. // This is the same logic used by the standalone configuration file // reloader implemented on libbeat/cfgfile/reload.go inputNotFinishedErr := &common.ErrInputNotFinished{} - if ok := errors.As(cause, &inputNotFinishedErr); ok { + if ok := errors.As(causeErr, &inputNotFinishedErr); ok { cm.logger.Debugf("file %q is not finished, will retry starting the input in %s", inputNotFinishedErr.File) cm.forceReload.Store(true) cm.logger.Debug("ForceReload set to TRUE") @@ -831,3 +830,30 @@ func didChange(previous map[string]*proto.UnitExpectedConfig, latest map[string] } return false } + +// cause returns the underlying cause of the error, if possible. +// An error value has a cause if it implements the following +// interface: +// +// type causer interface { +// Cause() error +// } +// +// If the error does not implement Cause, the original error will +// be returned. If the error is nil, nil will be returned without further +// investigation. +// Source: https://github.com/pkg/errors/blob/614d223910a179a466c1767a985424175c39b465/errors.go#L264-L288 +func cause(err error) error { + type causer interface { + Cause() error + } + + for err != nil { + cause, ok := err.(causer) + if !ok { + break + } + err = cause.Cause() + } + return err +} From 37cfef382fcf7334e54a6ae9fa0fba150cb87993 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 28 Apr 2023 19:59:13 +0200 Subject: [PATCH 04/46] Implement tests The tests implemented by this commit ensure that `ManagerV2` can recover from `common.ErrInputNotFinished` given that the underlying implementations have not changed. See the comments on the code for more details. Other E2E testes will be implemented to avoid regressions. --- .../libbeat/management/input_reload_test.go | 214 ++++++++++++++++++ x-pack/libbeat/management/managerV2.go | 2 +- 2 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 x-pack/libbeat/management/input_reload_test.go diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go new file mode 100644 index 000000000000..1ed757bd54ba --- /dev/null +++ b/x-pack/libbeat/management/input_reload_test.go @@ -0,0 +1,214 @@ +package management + +import ( + "fmt" + "sync" + "testing" + "time" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/common/reload" + "github.com/elastic/elastic-agent-client/v7/pkg/client" + "github.com/elastic/elastic-agent-client/v7/pkg/proto" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/joeshaw/multierror" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +func TestInputReload(t *testing.T) { + logp.DevelopmentSetup(logp.WithLevel(logp.DebugLevel), logp.WithSelectors("*", "centralmgmt.V2-manager")) + r := reload.NewRegistry() + + output := &reloadable{} + r.MustRegisterOutput(output) + + reloadCallCount := 0 + inputs := &reloadableListMock{ + ReloadImpl: func(configs []*reload.ConfigWithMeta) error { + reloadCallCount++ + if reloadCallCount == 1 { + t.Log("Reload implementation retruning an error") + e1 := multierror.Errors{withCause{&common.ErrInputNotFinished{ + State: "", + File: "/tmp/foo.log", + }}} + return e1.Err() + } + t.Log("reload implementation retruning 'nil'") + + return nil + }, + } + r.MustRegisterInput(inputs) + + configIdx := -1 + + onObserved := func(observed *proto.CheckinObserved, currentIdx int) { + configIdx = currentIdx + t.Log(currentIdx, "on observed called") + for _, unit := range observed.Units { + t.Log(unit.Id, unit.Payload.String(), unit.Message, unit.GetState().String(), unit.Type.String(), unit.ConfigStateIdx) + } + } + srv := mockSrv([][]*proto.UnitExpected{ + { + { + Id: "output-unit", + Type: proto.UnitType_OUTPUT, + ConfigStateIdx: 1, + State: proto.State_HEALTHY, + LogLevel: proto.UnitLogLevel_DEBUG, + Config: &proto.UnitExpectedConfig{ + Id: "default", + Type: "elasticsearch", + Name: "elasticsearch", + }, + }, + { + Id: "input-unit-1", + Type: proto.UnitType_INPUT, + ConfigStateIdx: 1, + State: proto.State_HEALTHY, + LogLevel: proto.UnitLogLevel_DEBUG, + Config: &proto.UnitExpectedConfig{ + Id: "log-input", + Type: "log", + Name: "log", + Streams: []*proto.Stream{ + { + Id: "log-input-1", + Source: requireNewStruct(t, map[string]interface{}{ + "paths": []interface{}{"/tmp/foo.log"}, + }), + }, + }, + }, + }, + }, + { + { + Id: "output-unit", + Type: proto.UnitType_OUTPUT, + ConfigStateIdx: 1, + State: proto.State_HEALTHY, + LogLevel: proto.UnitLogLevel_DEBUG, + Config: &proto.UnitExpectedConfig{ + Id: "default", + Type: "elasticsearch", + Name: "elasticsearch", + }, + }, + { + Id: "input-unit-1", + Type: proto.UnitType_INPUT, + ConfigStateIdx: 1, + State: proto.State_HEALTHY, + LogLevel: proto.UnitLogLevel_DEBUG, + Config: &proto.UnitExpectedConfig{ + Id: "log-input-2", + Type: "log", + Name: "log", + Streams: []*proto.Stream{ + { + Id: "log-input-2", + Source: requireNewStruct(t, map[string]interface{}{ + "paths": []interface{}{"/tmp/foo.log"}, + }), + }, + }, + }, + }, + }, + }, + []uint64{1, 1}, + []*proto.Features{ + nil, + nil, + }, + onObserved, + 500*time.Millisecond, + ) + require.NoError(t, srv.Start()) + defer srv.Stop() + + client := client.NewV2(fmt.Sprintf(":%d", srv.Port), "", client.VersionInfo{ + Name: "program", + Version: "v1.0.0", + Meta: map[string]string{ + "key": "value", + }, + }, grpc.WithTransportCredentials(insecure.NewCredentials())) + + m, err := NewV2AgentManagerWithClient(&Config{ + Enabled: true, + }, r, client) + require.NoError(t, err) + + mm := m.(*BeatV2Manager) + + err = m.Start() + require.NoError(t, err) + defer m.Stop() + + forceReloadFalseCount := 0 + forceReloadState := []bool{false, true, false} + forceReloadStateIdx := 0 + forceReloadLastState := true // starts on true so the first iteratiction is already a change + + // TODO: write a state machine to check the ForceReload Flip alongside the config indes (from the observe callback) + require.Eventually(t, func() bool { + forceReload := mm.forceReload.Load() + // That detects a state change, we only count/advance steps + // on state changes + if forceReload != forceReloadLastState { + t.Log("================ Force reload state change") + forceReloadLastState = forceReload + if forceReload == forceReloadState[forceReloadStateIdx] { + // Setp to the next state + forceReloadStateIdx++ + } + + // If we went through all states, then succeed + if forceReloadStateIdx == len(forceReloadState) { + // If we went through all states + if configIdx == 1 { + return true + } + } + } + t.Log("forcereload False cont: ", forceReloadFalseCount, "config idx:", configIdx, "Force reload status:", mm.forceReload.Load(), "force reload state idx:", forceReloadStateIdx) + return configIdx == 1 && forceReloadFalseCount == 2 + }, 15*time.Hour, 300*time.Millisecond) +} + +type reloadableListMock struct { + mx sync.Mutex + configs []*reload.ConfigWithMeta + ReloadImpl func(configs []*reload.ConfigWithMeta) error +} + +func (r *reloadableListMock) Reload(configs []*reload.ConfigWithMeta) error { + r.mx.Lock() + defer r.mx.Unlock() + return r.ReloadImpl(configs) +} + +func (r *reloadableListMock) Configs() []*reload.ConfigWithMeta { + r.mx.Lock() + defer r.mx.Unlock() + return r.configs +} + +type withCause struct { + Err error +} + +func (w withCause) Cause() error { + return w.Err +} + +func (w withCause) Error() string { + return w.Err.Error() +} diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index bd92b6db568e..29a38112a9d4 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -688,7 +688,7 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { // reloader implemented on libbeat/cfgfile/reload.go inputNotFinishedErr := &common.ErrInputNotFinished{} if ok := errors.As(causeErr, &inputNotFinishedErr); ok { - cm.logger.Debugf("file %q is not finished, will retry starting the input in %s", inputNotFinishedErr.File) + cm.logger.Debugf("file '%s' is not finished, will retry starting the input soon", inputNotFinishedErr.File) cm.forceReload.Store(true) cm.logger.Debug("ForceReload set to TRUE") continue From 2ba009abddca8b6dd3c26c77b18cd960ee5ad770 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 2 May 2023 13:01:19 +0200 Subject: [PATCH 05/46] Remove test debug logs --- x-pack/libbeat/management/input_reload_test.go | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go index 1ed757bd54ba..e0569a84a7f5 100644 --- a/x-pack/libbeat/management/input_reload_test.go +++ b/x-pack/libbeat/management/input_reload_test.go @@ -10,7 +10,6 @@ import ( "github.com/elastic/beats/v7/libbeat/common/reload" "github.com/elastic/elastic-agent-client/v7/pkg/client" "github.com/elastic/elastic-agent-client/v7/pkg/proto" - "github.com/elastic/elastic-agent-libs/logp" "github.com/joeshaw/multierror" "github.com/stretchr/testify/require" "google.golang.org/grpc" @@ -18,7 +17,8 @@ import ( ) func TestInputReload(t *testing.T) { - logp.DevelopmentSetup(logp.WithLevel(logp.DebugLevel), logp.WithSelectors("*", "centralmgmt.V2-manager")) + // Uncomment this line to see the debug logs for this test + // logp.DevelopmentSetup(logp.WithLevel(logp.DebugLevel), logp.WithSelectors("*", "centralmgmt.V2-manager")) r := reload.NewRegistry() output := &reloadable{} @@ -29,14 +29,12 @@ func TestInputReload(t *testing.T) { ReloadImpl: func(configs []*reload.ConfigWithMeta) error { reloadCallCount++ if reloadCallCount == 1 { - t.Log("Reload implementation retruning an error") e1 := multierror.Errors{withCause{&common.ErrInputNotFinished{ State: "", File: "/tmp/foo.log", }}} return e1.Err() } - t.Log("reload implementation retruning 'nil'") return nil }, @@ -44,14 +42,10 @@ func TestInputReload(t *testing.T) { r.MustRegisterInput(inputs) configIdx := -1 - onObserved := func(observed *proto.CheckinObserved, currentIdx int) { configIdx = currentIdx - t.Log(currentIdx, "on observed called") - for _, unit := range observed.Units { - t.Log(unit.Id, unit.Payload.String(), unit.Message, unit.GetState().String(), unit.Type.String(), unit.ConfigStateIdx) - } } + srv := mockSrv([][]*proto.UnitExpected{ { { @@ -163,7 +157,6 @@ func TestInputReload(t *testing.T) { // That detects a state change, we only count/advance steps // on state changes if forceReload != forceReloadLastState { - t.Log("================ Force reload state change") forceReloadLastState = forceReload if forceReload == forceReloadState[forceReloadStateIdx] { // Setp to the next state @@ -178,7 +171,7 @@ func TestInputReload(t *testing.T) { } } } - t.Log("forcereload False cont: ", forceReloadFalseCount, "config idx:", configIdx, "Force reload status:", mm.forceReload.Load(), "force reload state idx:", forceReloadStateIdx) + return configIdx == 1 && forceReloadFalseCount == 2 }, 15*time.Hour, 300*time.Millisecond) } From 66176c86e4e3f944e3b87c1f540845be590a1473 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 2 May 2023 14:14:15 +0200 Subject: [PATCH 06/46] fmt and add lisence headers --- x-pack/libbeat/management/input_reload_test.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go index e0569a84a7f5..ad83298e83e5 100644 --- a/x-pack/libbeat/management/input_reload_test.go +++ b/x-pack/libbeat/management/input_reload_test.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package management import ( @@ -6,14 +10,15 @@ import ( "testing" "time" - "github.com/elastic/beats/v7/libbeat/common" - "github.com/elastic/beats/v7/libbeat/common/reload" - "github.com/elastic/elastic-agent-client/v7/pkg/client" - "github.com/elastic/elastic-agent-client/v7/pkg/proto" "github.com/joeshaw/multierror" "github.com/stretchr/testify/require" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/common/reload" + "github.com/elastic/elastic-agent-client/v7/pkg/client" + "github.com/elastic/elastic-agent-client/v7/pkg/proto" ) func TestInputReload(t *testing.T) { From b31bd28fb2d6aea3de409883b25f584c9df4281a Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 2 May 2023 16:10:55 +0200 Subject: [PATCH 07/46] make lint happy again --- filebeat/input/log/input.go | 6 +++--- x-pack/libbeat/management/managerV2.go | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/filebeat/input/log/input.go b/filebeat/input/log/input.go index 46ed594d09a7..b51f644af003 100644 --- a/filebeat/input/log/input.go +++ b/filebeat/input/log/input.go @@ -471,7 +471,7 @@ func getFileState(path string, info os.FileInfo, p *Input) (file.State, error) { var absolutePath string absolutePath, err = filepath.Abs(path) if err != nil { - return file.State{}, fmt.Errorf("could not fetch abs path for file %s: %s", absolutePath, err) + return file.State{}, fmt.Errorf("could not fetch abs path for file %s: %w", absolutePath, err) } p.logger.Debugf("Check file for harvesting: %s", absolutePath) // Create new state for comparison @@ -745,7 +745,7 @@ func (p *Input) startHarvester(logger *logp.Logger, state file.State, offset int err = h.Setup() if err != nil { p.numHarvesters.Dec() - return fmt.Errorf("error setting up harvester: %s", err) + return fmt.Errorf("error setting up harvester: %w", err) } // Update state before staring harvester @@ -780,7 +780,7 @@ func (p *Input) updateState(state file.State) error { stateToRemove := file.State{Id: state.PrevId, TTL: 0, Finished: true, Meta: nil} err := p.doUpdate(stateToRemove) if err != nil { - return fmt.Errorf("failed to remove outdated states based on prev_id: %v", err) + return fmt.Errorf("failed to remove outdated states based on prev_id: %w", err) } } diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 29a38112a9d4..b11cf7bae60b 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -849,6 +849,8 @@ func cause(err error) error { } for err != nil { + //nolint: errorlint // This is the original github.com/pkg/errors + // implementation. cause, ok := err.(causer) if !ok { break From c902ab82733eb3fbbe2466f0108246f4aaa60300 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 2 May 2023 16:21:59 +0200 Subject: [PATCH 08/46] Add changelog --- CHANGELOG.next.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index d7d4137bb2fc..6f53e00913e3 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -117,6 +117,7 @@ https://github.com/elastic/beats/compare/v8.2.0\...main[Check the HEAD diff] - Sanitize filenames for request tracer in cel input. {pull}35154[35154] - Fix accidental error overwrite in defer statement in entityanalytics Azure AD input. {issue}35153[35153] {pull}35169[35169] - Fixing the grok expression outputs of log files {pull}35221[35221] +- Fixes "Can only start an input when all related states are finished" when running under Elastic-Agent {pull}35250[35250] {issue}33653[33653] *Heartbeat* From dd0a47e7415378c5ffac604cb4846ba693805a81 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 3 May 2023 10:15:31 +0200 Subject: [PATCH 09/46] improve log entries --- x-pack/libbeat/management/managerV2.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index b11cf7bae60b..0a8ad3ab1538 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -665,7 +665,9 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { } if cm.forceReload.Load() { - cm.logger.Debug("Reloading Beats inputs because forceReload is true") + cm.logger.Info("Reloading Beats inputs because forceReload is true. " + + "Set log level to debug to get more information about which " + + "inputs are causing this.") } if err := obj.Reload(inputBeatCfgs); err != nil { @@ -705,7 +707,7 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { } else { // no issues while reloading inputs, set forceReload to false cm.forceReload.Store(false) - cm.logger.Info("ForceReload set to FALSE") + cm.logger.Debug("ForceReload set to FALSE") } cm.lastInputCfgs = inputCfgs From 0b36d196a4afcce7b0d3bef63eea1b3b7d962b3e Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 3 May 2023 12:42:10 +0200 Subject: [PATCH 10/46] Clean up tests --- x-pack/libbeat/management/input_reload_test.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go index ad83298e83e5..40d80c004658 100644 --- a/x-pack/libbeat/management/input_reload_test.go +++ b/x-pack/libbeat/management/input_reload_test.go @@ -151,13 +151,11 @@ func TestInputReload(t *testing.T) { require.NoError(t, err) defer m.Stop() - forceReloadFalseCount := 0 forceReloadState := []bool{false, true, false} forceReloadStateIdx := 0 forceReloadLastState := true // starts on true so the first iteratiction is already a change - // TODO: write a state machine to check the ForceReload Flip alongside the config indes (from the observe callback) - require.Eventually(t, func() bool { + eventuallyCheck := func() bool { forceReload := mm.forceReload.Load() // That detects a state change, we only count/advance steps // on state changes @@ -177,8 +175,11 @@ func TestInputReload(t *testing.T) { } } - return configIdx == 1 && forceReloadFalseCount == 2 - }, 15*time.Hour, 300*time.Millisecond) + return false + } + + require.Eventually(t, eventuallyCheck, 20*time.Second, 300*time.Millisecond, + "the expected changes on forceReload did not happen") } type reloadableListMock struct { From 71bd239548c9761c18e0085248c7e13b728ff05c Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 5 May 2023 17:07:01 +0200 Subject: [PATCH 11/46] PR improvements --- x-pack/libbeat/management/input_reload_test.go | 2 +- x-pack/libbeat/management/managerV2.go | 16 ++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go index 40d80c004658..64a2eeac0c95 100644 --- a/x-pack/libbeat/management/input_reload_test.go +++ b/x-pack/libbeat/management/input_reload_test.go @@ -156,7 +156,7 @@ func TestInputReload(t *testing.T) { forceReloadLastState := true // starts on true so the first iteratiction is already a change eventuallyCheck := func() bool { - forceReload := mm.forceReload.Load() + forceReload := mm.forceReload // That detects a state change, we only count/advance steps // on state changes if forceReload != forceReloadLastState { diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 0a8ad3ab1538..d2131de61bb7 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -11,7 +11,6 @@ import ( "os" "os/signal" "sync" - "sync/atomic" "syscall" "time" @@ -55,8 +54,7 @@ type BeatV2Manager struct { mx sync.Mutex units map[unitKey]*client.Unit actions []client.Action - forceReload atomic.Bool - reloadLock sync.Mutex + forceReload bool // status is reported as a whole for every unit sent to this component // hopefully this can be improved in the future to be separated per unit @@ -450,7 +448,7 @@ func (cm *BeatV2Manager) unitListen() { } cm.mx.Unlock() cm.reload(units) - if cm.forceReload.Load() { + if cm.forceReload { // Restart the debounce timer so we try to reload the inputs. t.Reset(forceReloadDebounce) } @@ -633,8 +631,6 @@ func (cm *BeatV2Manager) reloadOutput(unit *client.Unit) error { } func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { - cm.reloadLock.Lock() - defer cm.reloadLock.Unlock() obj := cm.registry.GetInputList() if obj == nil { return fmt.Errorf("failed to find beat reloadable type 'input'") @@ -659,12 +655,12 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { inputBeatCfgs = append(inputBeatCfgs, inputCfg...) } - if !didChange(cm.lastInputCfgs, inputCfgs) && !cm.forceReload.Load() { + if !didChange(cm.lastInputCfgs, inputCfgs) && !cm.forceReload { cm.logger.Debug("Skipped reloading input units; configuration didn't change") return nil } - if cm.forceReload.Load() { + if cm.forceReload { cm.logger.Info("Reloading Beats inputs because forceReload is true. " + "Set log level to debug to get more information about which " + "inputs are causing this.") @@ -691,7 +687,7 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { inputNotFinishedErr := &common.ErrInputNotFinished{} if ok := errors.As(causeErr, &inputNotFinishedErr); ok { cm.logger.Debugf("file '%s' is not finished, will retry starting the input soon", inputNotFinishedErr.File) - cm.forceReload.Store(true) + cm.forceReload = true cm.logger.Debug("ForceReload set to TRUE") continue } @@ -706,7 +702,7 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { } } else { // no issues while reloading inputs, set forceReload to false - cm.forceReload.Store(false) + cm.forceReload = false cm.logger.Debug("ForceReload set to FALSE") } From 4d2e32dbe96c6849af4ad7aafcb189d7002920d0 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 5 May 2023 12:32:17 +0200 Subject: [PATCH 12/46] [WIP] Integration tests --- x-pack/libbeat/management/config.go | 16 +- x-pack/libbeat/management/framework_test.go | 91 ++++++++++ x-pack/libbeat/management/integ_test.go | 181 ++++++++++++++++++++ x-pack/libbeat/management/managerV2.go | 41 +++-- 4 files changed, 315 insertions(+), 14 deletions(-) create mode 100644 x-pack/libbeat/management/framework_test.go create mode 100644 x-pack/libbeat/management/integ_test.go diff --git a/x-pack/libbeat/management/config.go b/x-pack/libbeat/management/config.go index ca5bc72b79a4..82986031465f 100644 --- a/x-pack/libbeat/management/config.go +++ b/x-pack/libbeat/management/config.go @@ -5,15 +5,18 @@ package management import ( + "errors" + "github.com/elastic/beats/v7/libbeat/common/reload" conf "github.com/elastic/elastic-agent-libs/config" ) // Config for central management type Config struct { - Enabled bool `config:"enabled" yaml:"enabled"` - Blacklist ConfigBlacklistSettings `config:"blacklist" yaml:"blacklist"` - RestartOnOutputChange bool `config:"restart_on_output_change" yaml:"restart_on_output_change"` + Enabled bool `config:"enabled" yaml:"enabled"` + Blacklist ConfigBlacklistSettings `config:"blacklist" yaml:"blacklist"` + RestartOnOutputChange bool `config:"restart_on_output_change" yaml:"restart_on_output_change"` + InsecureGRPCURLForTesting string `config:"insecure_grpc_url_for_testing" yaml:"insecure_grpc_url_for_testing"` } // ConfigBlock stores a piece of config from central management @@ -56,3 +59,10 @@ func (c *ConfigBlock) ConfigWithMeta() (*reload.ConfigWithMeta, error) { Config: config, }, nil } + +func (c Config) Validate() error { + if !c.Enabled && c.InsecureGRPCURLForTesting != "" { + return errors.New("'management.insecure_grpc_url_for_testing' can only be used if 'management.enabled' is set.") + } + return nil +} diff --git a/x-pack/libbeat/management/framework_test.go b/x-pack/libbeat/management/framework_test.go new file mode 100644 index 000000000000..ade2225d226a --- /dev/null +++ b/x-pack/libbeat/management/framework_test.go @@ -0,0 +1,91 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package management + +import ( + "bufio" + "fmt" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" +) + +type Proc struct { + Binary string + Args []string + Cmd *exec.Cmd + t *testing.T + stdout *bufio.Scanner +} + +func NewProc(t *testing.T, binary string, args []string, port int) Proc { + p := Proc{ + t: t, + Binary: binary, + Args: append([]string{ + "--systemTest", + "-e", + "-d", + // "*", + "centralmgmt, centralmgmt.V2-manager", + "-E", + fmt.Sprintf("management.insecure_grpc_url_for_testing=\"localhost:%d\"", port), + "-E", + "management.enabled=true", + }, args...), + } + return p +} + +func (p *Proc) Start() { + fullPath, err := filepath.Abs(p.Binary) + if err != nil { + p.t.Fatalf("could got get full path from %q, err: %s", p.Binary, err) + } + p.Cmd = exec.Command(fullPath, p.Args...) + stdout, err := p.Cmd.StderrPipe() + if err != nil { + p.t.Fatalf("could not get stdout pipe for process, err: %s", err) + } + p.stdout = bufio.NewScanner(stdout) + + if err := p.Cmd.Start(); err != nil { + p.t.Fatalf("could not start process: %s", err) + } + p.t.Cleanup(func() { + pid := p.Cmd.Process.Pid + if err := p.Cmd.Process.Kill(); err != nil { + p.t.Fatalf("could not stop process with PID: %d, err: %s", pid, err) + } + }) +} + +func (p *Proc) LogContains(s string, timeout time.Duration) bool { + p.t.Log("LogContans called") + defer p.t.Log("LogContans done") + + ticker := time.NewTicker(10 * time.Millisecond) + defer ticker.Stop() + + timer := time.NewTimer(timeout) + defer timer.Stop() + + for { + select { + default: + if p.stdout.Scan() { + line := p.stdout.Text() + if strings.Contains(line, s) { + fmt.Println(line) + return true + } + } + case <-timer.C: + p.t.Fatal("timeout") + } + } +} diff --git a/x-pack/libbeat/management/integ_test.go b/x-pack/libbeat/management/integ_test.go new file mode 100644 index 000000000000..6b26cca9c67c --- /dev/null +++ b/x-pack/libbeat/management/integ_test.go @@ -0,0 +1,181 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package management + +import ( + "testing" + "time" + + "github.com/elastic/elastic-agent-client/v7/pkg/client/mock" + "github.com/elastic/elastic-agent-client/v7/pkg/proto" + "github.com/stretchr/testify/require" +) + +func TestPureServe(t *testing.T) { + var units = [][]*proto.UnitExpected{ + { + { + Id: "output-unit", + Type: proto.UnitType_OUTPUT, + ConfigStateIdx: 1, + State: proto.State_HEALTHY, + LogLevel: proto.UnitLogLevel_DEBUG, + Config: &proto.UnitExpectedConfig{ + Id: "default", + Type: "elasticsearch", + Name: "elasticsearch", + Source: requireNewStruct(t, + map[string]interface{}{ + "type": "elasticsearch", + "hosts": []interface{}{"http://localhost:9200"}, + "username": "admin", + "password": "testing", + "protocol": "http", + "enabled": true, + "index": "foo-index", + }), + }, + }, + { + Id: "input-unit-1", + Type: proto.UnitType_INPUT, + ConfigStateIdx: 1, + State: proto.State_HEALTHY, + LogLevel: proto.UnitLogLevel_DEBUG, + Config: &proto.UnitExpectedConfig{ + Id: "log-input", + Type: "log", + Name: "log", + Streams: []*proto.Stream{ + { + Id: "log-input-1", + Source: requireNewStruct(t, map[string]interface{}{ + "enabled": true, + "type": "log", + "paths": []interface{}{"/tmp/flog.log"}, + "index": "foo-index", + }), + }, + }, + }, + }, + }, + { + { + Id: "output-unit", + Type: proto.UnitType_OUTPUT, + ConfigStateIdx: 1, + State: proto.State_HEALTHY, + LogLevel: proto.UnitLogLevel_DEBUG, + Config: &proto.UnitExpectedConfig{ + Id: "default", + Type: "elasticsearch", + Name: "elasticsearch", + Source: requireNewStruct(t, + map[string]interface{}{ + "type": "elasticsearch", + "hosts": []interface{}{"http://localhost:9200"}, + "username": "admin", + "password": "testing", + "protocol": "http", + "enabled": true, + "index": "foo-index", + }), + }, + }, + { + Id: "input-unit-2", + Type: proto.UnitType_INPUT, + ConfigStateIdx: 1, + State: proto.State_HEALTHY, + LogLevel: proto.UnitLogLevel_DEBUG, + Config: &proto.UnitExpectedConfig{ + Id: "log-input", + Type: "log", + Name: "log", + Streams: []*proto.Stream{ + { + Id: "log-input-2", + Source: requireNewStruct(t, map[string]interface{}{ + "enabled": true, + "type": "log", + "paths": []interface{}{"/tmp/flog.log"}, + "index": "foo-index", + }), + }, + }, + }, + }, + }, + } + + idx := 0 + waiting := false + when := time.Now() + nextState := func() { + if waiting { + if time.Now().After(when) { + idx = (idx + 1) % len(units) + waiting = false + t.Logf("done waiting, new state is %d", idx) + return + } + return + } + waiting = true + when = time.Now().Add(10 * time.Second) + } + server := &mock.StubServerV2{ + CheckinV2Impl: func(observed *proto.CheckinObserved) *proto.CheckinExpected { + t.Log("====================================================================================================") + defer t.Log("====================================================================================================") + t.Logf("[%s] Got %d units", time.Now().Format(time.RFC3339), len(observed.Units)) + if doesStateMatch(observed, units[idx], 0) { + t.Logf("++++++++++ reached desired state, sending units[%d]", idx) + nextState() + } + for i, unit := range observed.GetUnits() { + t.Logf("Unit %d", i) + t.Logf("ID %s, Type: %s, Message: %s, State %s, Payload %s", + unit.GetId(), + unit.GetType(), + unit.GetMessage(), + unit.GetState(), + unit.GetPayload().String()) + + if state := unit.GetState(); !(state == proto.State_HEALTHY || state != proto.State_CONFIGURING || state == proto.State_STARTING) { + t.Fatalf("Unit '%s' is not healthy, state: %s", unit.GetId(), unit.GetState().String()) + } + } + return &proto.CheckinExpected{ + // AgentInfo: agentInfo, + // Features: features[i], + // FeaturesIdx: featuresIdxs[i], + Units: units[idx], + } + }, + ActionImpl: func(response *proto.ActionResponse) error { + return nil + }, + ActionsChan: make(chan *mock.PerformAction, 100), + } + + require.NoError(t, server.Start()) + defer server.Stop() + t.Logf("server started on port %d", server.Port) + + p := NewProc(t, "../../filebeat/filebeat.test", nil, server.Port) + p.Start() + t.Log("Filebeat started") + + p.LogContains("Can only start an input when all related states are finished", 2*time.Minute) // centralmgmt + p.LogContains("file 'flog.log' is not finished, will retry starting the input soon", 2*time.Minute) // centralmgmt.V2-manager + p.LogContains("ForceReload set to TRUE", 2*time.Minute) // centralmgmt.V2-manager + p.LogContains("Reloading Beats inputs because forceReload is true", 2*time.Minute) // centralmgmt.V2-manager + p.LogContains("ForceReload set to FALSE", 2*time.Minute) // centralmgmt.V2-manager + t.Log("********************************************* IT WORKS ****************************************************************************************************") +} diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index d2131de61bb7..00c3b3918722 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -16,6 +16,8 @@ import ( "github.com/joeshaw/multierror" "go.uber.org/zap/zapcore" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" gproto "google.golang.org/protobuf/proto" "gopkg.in/yaml.v2" @@ -112,22 +114,38 @@ func init() { // This is registered as the manager factory in init() so that calls to // lbmanagement.NewManager will be forwarded here. func NewV2AgentManager(config *conf.C, registry *reload.Registry) (lbmanagement.Manager, error) { + logger := logp.NewLogger(lbmanagement.DebugK).Named("V2-manager") c := DefaultConfig() if config.Enabled() { if err := config.Unpack(&c); err != nil { return nil, fmt.Errorf("parsing fleet management settings: %w", err) } } - agentClient, _, err := client.NewV2FromReader(os.Stdin, client.VersionInfo{ - Name: "beat-v2-client", - Version: version.GetDefaultVersion(), - Meta: map[string]string{ - "commit": version.Commit(), - "build_time": version.BuildTime().String(), - }, - }) - if err != nil { - return nil, fmt.Errorf("error reading control config from agent: %w", err) + + var agentClient client.V2 + var err error + if c.InsecureGRPCURLForTesting != "" && c.Enabled { + // Insecure for testing Elastic-Agent-Client initialisation + logger.Info("Using INSECURE GRPC connection, this should be only used for testing!") + agentClient = client.NewV2(c.InsecureGRPCURLForTesting, + "", // Insecure connection for test, no token needed + client.VersionInfo{ + Name: "testing program", + Version: "v1.0.0", + }, grpc.WithTransportCredentials(insecure.NewCredentials())) + } else { + // Normal Elastic-Agent-Client initialisation + agentClient, _, err = client.NewV2FromReader(os.Stdin, client.VersionInfo{ + Name: "beat-v2-client", + Version: version.GetDefaultVersion(), + Meta: map[string]string{ + "commit": version.Commit(), + "build_time": version.BuildTime().String(), + }, + }) + if err != nil { + return nil, fmt.Errorf("error reading control config from agent: %w", err) + } } // officially running under the elastic-agent; we set the publisher pipeline @@ -420,7 +438,8 @@ func (cm *BeatV2Manager) unitListen() { return case change := <-cm.client.UnitChanges(): cm.logger.Infof( - "BeatV2Manager.unitListen UnitChanged.Type(%s), UnitChanged.Trigger(%d): %s/%s", + "BeatV2Manager.unitListen UnitChanged.ID(%s), UnitChanged.Type(%s), UnitChanged.Trigger(%d): %s/%s", + change.Unit.ID(), change.Type, int64(change.Triggers), change.Type, change.Triggers) switch change.Type { From a1ac3bbab6407a2411c6c6f164b6bd6a3dc9d9f2 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 5 May 2023 15:20:28 +0200 Subject: [PATCH 13/46] improve tests, move files to Filebeat --- x-pack/filebeat/magefile.go | 1 + .../tests/integration}/framework_test.go | 2 +- .../tests/integration}/integ_test.go | 49 ++++++++++++++++++- 3 files changed, 49 insertions(+), 3 deletions(-) rename x-pack/{libbeat/management => filebeat/tests/integration}/framework_test.go (98%) rename x-pack/{libbeat/management => filebeat/tests/integration}/integ_test.go (83%) diff --git a/x-pack/filebeat/magefile.go b/x-pack/filebeat/magefile.go index 0474e41e61e9..bf8f0e7040f1 100644 --- a/x-pack/filebeat/magefile.go +++ b/x-pack/filebeat/magefile.go @@ -179,6 +179,7 @@ func IntegTest() { // GoIntegTest starts the docker containers and executes the Go integration tests. func GoIntegTest(ctx context.Context) error { + mg.Deps(BuildSystemTestBinary) return devtools.GoIntegTestFromHost(ctx, devtools.DefaultGoTestIntegrationFromHostArgs()) } diff --git a/x-pack/libbeat/management/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go similarity index 98% rename from x-pack/libbeat/management/framework_test.go rename to x-pack/filebeat/tests/integration/framework_test.go index ade2225d226a..5fe26060b734 100644 --- a/x-pack/libbeat/management/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -79,8 +79,8 @@ func (p *Proc) LogContains(s string, timeout time.Duration) bool { default: if p.stdout.Scan() { line := p.stdout.Text() + // fmt.Println(line) if strings.Contains(line, s) { - fmt.Println(line) return true } } diff --git a/x-pack/libbeat/management/integ_test.go b/x-pack/filebeat/tests/integration/integ_test.go similarity index 83% rename from x-pack/libbeat/management/integ_test.go rename to x-pack/filebeat/tests/integration/integ_test.go index 6b26cca9c67c..6ec17a18159d 100644 --- a/x-pack/libbeat/management/integ_test.go +++ b/x-pack/filebeat/tests/integration/integ_test.go @@ -10,9 +10,12 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/structpb" + + "github.com/elastic/elastic-agent-client/v7/pkg/client" "github.com/elastic/elastic-agent-client/v7/pkg/client/mock" "github.com/elastic/elastic-agent-client/v7/pkg/proto" - "github.com/stretchr/testify/require" ) func TestPureServe(t *testing.T) { @@ -168,7 +171,7 @@ func TestPureServe(t *testing.T) { defer server.Stop() t.Logf("server started on port %d", server.Port) - p := NewProc(t, "../../filebeat/filebeat.test", nil, server.Port) + p := NewProc(t, "../../filebeat.test", nil, server.Port) p.Start() t.Log("Filebeat started") @@ -179,3 +182,45 @@ func TestPureServe(t *testing.T) { p.LogContains("ForceReload set to FALSE", 2*time.Minute) // centralmgmt.V2-manager t.Log("********************************************* IT WORKS ****************************************************************************************************") } + +func doesStateMatch( + observed *proto.CheckinObserved, + expectedUnits []*proto.UnitExpected, + expectedFeaturesIdx uint64, +) bool { + if len(observed.Units) != len(expectedUnits) { + return false + } + expectedMap := make(map[unitKey]*proto.UnitExpected) + for _, exp := range expectedUnits { + expectedMap[unitKey{client.UnitType(exp.Type), exp.Id}] = exp + } + for _, unit := range observed.Units { + exp, ok := expectedMap[unitKey{client.UnitType(unit.Type), unit.Id}] + if !ok { + return false + } + if unit.State != exp.State || unit.ConfigStateIdx != exp.ConfigStateIdx { + return false + } + } + + if observed.FeaturesIdx != expectedFeaturesIdx { + return false + } + + return true +} + +type unitKey struct { + Type client.UnitType + ID string +} + +func requireNewStruct(t *testing.T, v map[string]interface{}) *structpb.Struct { + str, err := structpb.NewStruct(v) + if err != nil { + require.NoError(t, err) + } + return str +} From b149c8504cc0cf71c1c02d661d5b48e474eac784 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 5 May 2023 17:29:53 +0200 Subject: [PATCH 14/46] clean up tests --- .../tests/integration/framework_test.go | 6 ---- .../filebeat/tests/integration/integ_test.go | 33 +++++-------------- 2 files changed, 8 insertions(+), 31 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 5fe26060b734..3f6249542955 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -29,9 +29,6 @@ func NewProc(t *testing.T, binary string, args []string, port int) Proc { Args: append([]string{ "--systemTest", "-e", - "-d", - // "*", - "centralmgmt, centralmgmt.V2-manager", "-E", fmt.Sprintf("management.insecure_grpc_url_for_testing=\"localhost:%d\"", port), "-E", @@ -65,9 +62,6 @@ func (p *Proc) Start() { } func (p *Proc) LogContains(s string, timeout time.Duration) bool { - p.t.Log("LogContans called") - defer p.t.Log("LogContans done") - ticker := time.NewTicker(10 * time.Millisecond) defer ticker.Stop() diff --git a/x-pack/filebeat/tests/integration/integ_test.go b/x-pack/filebeat/tests/integration/integ_test.go index 6ec17a18159d..bd8dcfcf6e3b 100644 --- a/x-pack/filebeat/tests/integration/integ_test.go +++ b/x-pack/filebeat/tests/integration/integ_test.go @@ -39,7 +39,6 @@ func TestPureServe(t *testing.T) { "password": "testing", "protocol": "http", "enabled": true, - "index": "foo-index", }), }, }, @@ -60,7 +59,6 @@ func TestPureServe(t *testing.T) { "enabled": true, "type": "log", "paths": []interface{}{"/tmp/flog.log"}, - "index": "foo-index", }), }, }, @@ -86,7 +84,6 @@ func TestPureServe(t *testing.T) { "password": "testing", "protocol": "http", "enabled": true, - "index": "foo-index", }), }, }, @@ -107,7 +104,6 @@ func TestPureServe(t *testing.T) { "enabled": true, "type": "log", "paths": []interface{}{"/tmp/flog.log"}, - "index": "foo-index", }), }, }, @@ -124,7 +120,6 @@ func TestPureServe(t *testing.T) { if time.Now().After(when) { idx = (idx + 1) % len(units) waiting = false - t.Logf("done waiting, new state is %d", idx) return } return @@ -134,30 +129,15 @@ func TestPureServe(t *testing.T) { } server := &mock.StubServerV2{ CheckinV2Impl: func(observed *proto.CheckinObserved) *proto.CheckinExpected { - t.Log("====================================================================================================") - defer t.Log("====================================================================================================") - t.Logf("[%s] Got %d units", time.Now().Format(time.RFC3339), len(observed.Units)) if doesStateMatch(observed, units[idx], 0) { - t.Logf("++++++++++ reached desired state, sending units[%d]", idx) nextState() } - for i, unit := range observed.GetUnits() { - t.Logf("Unit %d", i) - t.Logf("ID %s, Type: %s, Message: %s, State %s, Payload %s", - unit.GetId(), - unit.GetType(), - unit.GetMessage(), - unit.GetState(), - unit.GetPayload().String()) - + for _, unit := range observed.GetUnits() { if state := unit.GetState(); !(state == proto.State_HEALTHY || state != proto.State_CONFIGURING || state == proto.State_STARTING) { t.Fatalf("Unit '%s' is not healthy, state: %s", unit.GetId(), unit.GetState().String()) } } return &proto.CheckinExpected{ - // AgentInfo: agentInfo, - // Features: features[i], - // FeaturesIdx: featuresIdxs[i], Units: units[idx], } }, @@ -169,18 +149,21 @@ func TestPureServe(t *testing.T) { require.NoError(t, server.Start()) defer server.Stop() - t.Logf("server started on port %d", server.Port) - p := NewProc(t, "../../filebeat.test", nil, server.Port) + p := NewProc( + t, + "../../filebeat.test", + []string{"-d", + "centralmgmt, centralmgmt.V2-manager", + }, + server.Port) p.Start() - t.Log("Filebeat started") p.LogContains("Can only start an input when all related states are finished", 2*time.Minute) // centralmgmt p.LogContains("file 'flog.log' is not finished, will retry starting the input soon", 2*time.Minute) // centralmgmt.V2-manager p.LogContains("ForceReload set to TRUE", 2*time.Minute) // centralmgmt.V2-manager p.LogContains("Reloading Beats inputs because forceReload is true", 2*time.Minute) // centralmgmt.V2-manager p.LogContains("ForceReload set to FALSE", 2*time.Minute) // centralmgmt.V2-manager - t.Log("********************************************* IT WORKS ****************************************************************************************************") } func doesStateMatch( From dda54d93fbd6621bed1db15b5b4127180638442f Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 5 May 2023 20:09:47 +0200 Subject: [PATCH 15/46] [WIP] Use log file for Filebeat Uses a log file so we can keep it in case of a failure. That helps debugging. --- .../tests/integration/framework_test.go | 89 ++++++++++++++---- .../filebeat/tests/integration/integ_test.go | 90 ++++++++++++++++--- 2 files changed, 150 insertions(+), 29 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 3f6249542955..b60805c27b27 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -7,33 +7,39 @@ package management import ( "bufio" "fmt" + "os" "os/exec" "path/filepath" "strings" "testing" "time" + + "github.com/stretchr/testify/require" ) type Proc struct { - Binary string - Args []string - Cmd *exec.Cmd - t *testing.T - stdout *bufio.Scanner + Binary string + Args []string + Cmd *exec.Cmd + t *testing.T + tempDir string } -func NewProc(t *testing.T, binary string, args []string, port int) Proc { +// NewBeat createa a new Beat process from the system tests binary. +// It sets some requried options like the home path, logging, etc. +func NewBeat(t *testing.T, binary string, args []string, tempDir string) Proc { p := Proc{ t: t, Binary: binary, Args: append([]string{ "--systemTest", - "-e", - "-E", - fmt.Sprintf("management.insecure_grpc_url_for_testing=\"localhost:%d\"", port), - "-E", - "management.enabled=true", + // "-e", + "--path.home", tempDir, + "--path.logs", tempDir, + "-E", "logging.to_files=true", + "-E", "logging.files.rotateeverybytes=104857600", // About 100MB }, args...), + tempDir: tempDir, } return p } @@ -44,11 +50,6 @@ func (p *Proc) Start() { p.t.Fatalf("could got get full path from %q, err: %s", p.Binary, err) } p.Cmd = exec.Command(fullPath, p.Args...) - stdout, err := p.Cmd.StderrPipe() - if err != nil { - p.t.Fatalf("could not get stdout pipe for process, err: %s", err) - } - p.stdout = bufio.NewScanner(stdout) if err := p.Cmd.Start(); err != nil { p.t.Fatalf("could not start process: %s", err) @@ -62,6 +63,8 @@ func (p *Proc) Start() { } func (p *Proc) LogContains(s string, timeout time.Duration) bool { + logFile := p.openLogFile() + scanner := bufio.NewScanner(logFile) ticker := time.NewTicker(10 * time.Millisecond) defer ticker.Stop() @@ -71,15 +74,65 @@ func (p *Proc) LogContains(s string, timeout time.Duration) bool { for { select { default: - if p.stdout.Scan() { - line := p.stdout.Text() + // fmt.Print(".") + if scanner.Scan() { + // fmt.Print("+") + line := scanner.Text() // fmt.Println(line) if strings.Contains(line, s) { + fmt.Println(line) return true } } + // scanner.Scan() will return false when it reaches the end of the file, + // then it will stop reading from the file. + // So if it's error is nil, we create a new scanner + if err := scanner.Err(); err == nil { + scanner = bufio.NewScanner(logFile) + // fmt.Println("got no error, creating new scanner") + } case <-timer.C: p.t.Fatal("timeout") } } } + +// openLogFile opens the log file for reading and returns it. +// It also registers a cleanup function to close the file +// when the test ends. +func (p *Proc) openLogFile() *os.File { + t := p.t + glob := fmt.Sprintf("%s-*.ndjson", filepath.Join(p.tempDir, "filebeat")) + files, err := filepath.Glob(glob) + if err != nil { + t.Fatalf("could not expand log file glob: %s", err) + } + t.Log("Glob:", glob, files) + + require.Eventually(t, func() bool { + files, err = filepath.Glob(glob) + if err != nil { + t.Fatalf("could not expand log file glob: %s", err) + } + t.Log("Glob:", glob, files) + if len(files) == 1 { + return true + } + + return false + }, 5*time.Second, 100*time.Millisecond, "waiting for log file") + // On a normal operation there must be a single log, if there are more + // than one, then there is an issue and the Beat is logging too much, + // which is enough to stop the test + if len(files) != 1 { + t.Fatalf("there must be only one log file for %s, found: %d", + glob, len(files)) + } + + f, err := os.Open(files[0]) + if err != nil { + t.Fatalf("could not open log file '%s': %s", files[0], err) + } + t.Cleanup(func() { f.Close() }) + return f +} diff --git a/x-pack/filebeat/tests/integration/integ_test.go b/x-pack/filebeat/tests/integration/integ_test.go index bd8dcfcf6e3b..126d8f3eda2a 100644 --- a/x-pack/filebeat/tests/integration/integ_test.go +++ b/x-pack/filebeat/tests/integration/integ_test.go @@ -7,6 +7,9 @@ package management import ( + "fmt" + "os" + "path/filepath" "testing" "time" @@ -18,7 +21,65 @@ import ( "github.com/elastic/elastic-agent-client/v7/pkg/proto" ) +func generateLogFile(t *testing.T, fullPath string) { + t.Helper() + f, err := os.Create(fullPath) + if err != nil { + t.Fatalf("could not create file '%s: %s", fullPath, err) + } + + go func() { + t.Helper() + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + defer f.Close() + for { + now := <-ticker.C + _, err := fmt.Fprintln(f, now.Format(time.RFC3339)) + if err != nil { + // The Go compiler does not allow me to call t.Fatalf from a non-test + // goroutine, so just log it instead + t.Errorf("could not write data to log file '%s': %s", fullPath, err) + return + } + // make sure log lines are synced as quickly as possible + if err := f.Sync(); err != nil { + t.Errorf("could not sync file '%s': %s", fullPath, err) + } + } + }() +} + func TestPureServe(t *testing.T) { + // We create our own temp dir so the files can be persisted + // in case the test fails. This will help debugging issues on CI + // + // testFailed will be set to 'false' as the very last thing on this test, + // it allows us to use t.CleanUp to remove the temporary files + testFailed := true + tempDir, err := filepath.Abs(filepath.Join("../../build/integration-tests/", + fmt.Sprintf("%s-%d", t.Name(), time.Now().Unix()))) + if err != nil { + t.Fatal(err) + } + + if err := os.MkdirAll(tempDir, 0766); err != nil { + t.Fatalf("cannot create tmp dir: %#v, msg: %s", err, err.Error()) + } + defer func() { testFailed = true }() //debug stuff + t.Cleanup(func() { + if !testFailed { + if err := os.RemoveAll(tempDir); err != nil { + t.Fatalf("could not remove temp dir '%s': %s", tempDir, err) + } + t.Logf("temprary directory '%s' removed", tempDir) + } + }) + + t.Logf("temporary directory: %s", tempDir) + + logFilePath := filepath.Join(tempDir, "flog.log") + generateLogFile(t, logFilePath) var units = [][]*proto.UnitExpected{ { { @@ -58,7 +119,7 @@ func TestPureServe(t *testing.T) { Source: requireNewStruct(t, map[string]interface{}{ "enabled": true, "type": "log", - "paths": []interface{}{"/tmp/flog.log"}, + "paths": []interface{}{logFilePath}, }), }, }, @@ -103,7 +164,7 @@ func TestPureServe(t *testing.T) { Source: requireNewStruct(t, map[string]interface{}{ "enabled": true, "type": "log", - "paths": []interface{}{"/tmp/flog.log"}, + "paths": []interface{}{logFilePath}, }), }, }, @@ -150,20 +211,27 @@ func TestPureServe(t *testing.T) { require.NoError(t, server.Start()) defer server.Stop() - p := NewProc( + p := NewBeat( t, "../../filebeat.test", - []string{"-d", - "centralmgmt, centralmgmt.V2-manager", + []string{ + // "-d", "centralmgmt, centralmgmt.V2-manager", + "-E", fmt.Sprintf("management.insecure_grpc_url_for_testing=\"localhost:%d\"", server.Port), + "-E", "management.enabled=true", }, - server.Port) + tempDir, + ) + p.Start() - p.LogContains("Can only start an input when all related states are finished", 2*time.Minute) // centralmgmt - p.LogContains("file 'flog.log' is not finished, will retry starting the input soon", 2*time.Minute) // centralmgmt.V2-manager - p.LogContains("ForceReload set to TRUE", 2*time.Minute) // centralmgmt.V2-manager - p.LogContains("Reloading Beats inputs because forceReload is true", 2*time.Minute) // centralmgmt.V2-manager - p.LogContains("ForceReload set to FALSE", 2*time.Minute) // centralmgmt.V2-manager + p.LogContains("Can only start an input when all related states are finished", 2*time.Minute) // logger: centralmgmt + p.LogContains("file 'flog.log' is not finished, will retry starting the input soon", 2*time.Minute) // logger: centralmgmt.V2-manager + p.LogContains("ForceReload set to TRUE", 2*time.Minute) // logger: centralmgmt.V2-manager + p.LogContains("Reloading Beats inputs because forceReload is true", 2*time.Minute) // logger: centralmgmt.V2-manager + p.LogContains("ForceReload set to FALSE", 2*time.Minute) // logger: centralmgmt.V2-manager + + // Set it to false, so the temporaty directory is removed + testFailed = false } func doesStateMatch( From d3e45a29d3e0543a303aee135f297570ccb0d217 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 8 May 2023 14:49:05 +0200 Subject: [PATCH 16/46] clean up integration tests --- .../tests/integration/framework_test.go | 63 +++++++------ .../filebeat/tests/integration/integ_test.go | 89 ++++++++++--------- x-pack/libbeat/management/managerV2_test.go | 31 +------ x-pack/libbeat/management/test_utils.go | 35 ++++++++ 4 files changed, 114 insertions(+), 104 deletions(-) create mode 100644 x-pack/libbeat/management/test_utils.go diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index b60805c27b27..fce07ff5b2e1 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -2,7 +2,9 @@ // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. -package management +//go:build integration + +package input import ( "bufio" @@ -17,7 +19,7 @@ import ( "github.com/stretchr/testify/require" ) -type Proc struct { +type BeatProc struct { Binary string Args []string Cmd *exec.Cmd @@ -27,13 +29,12 @@ type Proc struct { // NewBeat createa a new Beat process from the system tests binary. // It sets some requried options like the home path, logging, etc. -func NewBeat(t *testing.T, binary string, args []string, tempDir string) Proc { - p := Proc{ +func NewBeat(t *testing.T, binary string, args []string, tempDir string) BeatProc { + p := BeatProc{ t: t, Binary: binary, Args: append([]string{ "--systemTest", - // "-e", "--path.home", tempDir, "--path.logs", tempDir, "-E", "logging.to_files=true", @@ -44,29 +45,29 @@ func NewBeat(t *testing.T, binary string, args []string, tempDir string) Proc { return p } -func (p *Proc) Start() { - fullPath, err := filepath.Abs(p.Binary) +func (b *BeatProc) Start() { + fullPath, err := filepath.Abs(b.Binary) if err != nil { - p.t.Fatalf("could got get full path from %q, err: %s", p.Binary, err) + b.t.Fatalf("could got get full path from %q, err: %s", b.Binary, err) } - p.Cmd = exec.Command(fullPath, p.Args...) + b.Cmd = exec.Command(fullPath, b.Args...) - if err := p.Cmd.Start(); err != nil { - p.t.Fatalf("could not start process: %s", err) + if err := b.Cmd.Start(); err != nil { + b.t.Fatalf("could not start process: %s", err) } - p.t.Cleanup(func() { - pid := p.Cmd.Process.Pid - if err := p.Cmd.Process.Kill(); err != nil { - p.t.Fatalf("could not stop process with PID: %d, err: %s", pid, err) + b.t.Cleanup(func() { + pid := b.Cmd.Process.Pid + if err := b.Cmd.Process.Kill(); err != nil { + b.t.Fatalf("could not stop process with PID: %d, err: %s", pid, err) } }) } -func (p *Proc) LogContains(s string, timeout time.Duration) bool { - logFile := p.openLogFile() +// LogContains looks for s as a sub string of every log line, +// if it's not found until timeout is reached, the test fails +func (b *BeatProc) LogContains(s string, timeout time.Duration) bool { + logFile := b.openLogFile() scanner := bufio.NewScanner(logFile) - ticker := time.NewTicker(10 * time.Millisecond) - defer ticker.Stop() timer := time.NewTimer(timeout) defer timer.Stop() @@ -74,25 +75,21 @@ func (p *Proc) LogContains(s string, timeout time.Duration) bool { for { select { default: - // fmt.Print(".") if scanner.Scan() { - // fmt.Print("+") - line := scanner.Text() - // fmt.Println(line) - if strings.Contains(line, s) { - fmt.Println(line) + if strings.Contains(scanner.Text(), s) { return true } } // scanner.Scan() will return false when it reaches the end of the file, // then it will stop reading from the file. // So if it's error is nil, we create a new scanner + // + // This kind of creates a busy loop, but for a test it's acceptable. if err := scanner.Err(); err == nil { scanner = bufio.NewScanner(logFile) - // fmt.Println("got no error, creating new scanner") } case <-timer.C: - p.t.Fatal("timeout") + b.t.Fatalf("timeout reached while trying to find '%s' in the logs", s) } } } @@ -100,27 +97,27 @@ func (p *Proc) LogContains(s string, timeout time.Duration) bool { // openLogFile opens the log file for reading and returns it. // It also registers a cleanup function to close the file // when the test ends. -func (p *Proc) openLogFile() *os.File { - t := p.t - glob := fmt.Sprintf("%s-*.ndjson", filepath.Join(p.tempDir, "filebeat")) +func (b *BeatProc) openLogFile() *os.File { + t := b.t + glob := fmt.Sprintf("%s-*.ndjson", filepath.Join(b.tempDir, "filebeat")) files, err := filepath.Glob(glob) if err != nil { t.Fatalf("could not expand log file glob: %s", err) } - t.Log("Glob:", glob, files) require.Eventually(t, func() bool { files, err = filepath.Glob(glob) if err != nil { t.Fatalf("could not expand log file glob: %s", err) } - t.Log("Glob:", glob, files) if len(files) == 1 { return true } return false - }, 5*time.Second, 100*time.Millisecond, "waiting for log file") + }, 5*time.Second, 100*time.Millisecond, + "waiting for log file matching glob '%s' to be created", glob) + // On a normal operation there must be a single log, if there are more // than one, then there is an issue and the Beat is logging too much, // which is enough to stop the test diff --git a/x-pack/filebeat/tests/integration/integ_test.go b/x-pack/filebeat/tests/integration/integ_test.go index 126d8f3eda2a..735b5056c336 100644 --- a/x-pack/filebeat/tests/integration/integ_test.go +++ b/x-pack/filebeat/tests/integration/integ_test.go @@ -4,7 +4,7 @@ //go:build integration -package management +package input import ( "fmt" @@ -16,41 +16,21 @@ import ( "github.com/stretchr/testify/require" "google.golang.org/protobuf/types/known/structpb" + "github.com/elastic/beats/v7/x-pack/libbeat/management" "github.com/elastic/elastic-agent-client/v7/pkg/client" "github.com/elastic/elastic-agent-client/v7/pkg/client/mock" "github.com/elastic/elastic-agent-client/v7/pkg/proto" ) -func generateLogFile(t *testing.T, fullPath string) { - t.Helper() - f, err := os.Create(fullPath) - if err != nil { - t.Fatalf("could not create file '%s: %s", fullPath, err) - } - - go func() { - t.Helper() - ticker := time.NewTicker(time.Second) - defer ticker.Stop() - defer f.Close() - for { - now := <-ticker.C - _, err := fmt.Fprintln(f, now.Format(time.RFC3339)) - if err != nil { - // The Go compiler does not allow me to call t.Fatalf from a non-test - // goroutine, so just log it instead - t.Errorf("could not write data to log file '%s': %s", fullPath, err) - return - } - // make sure log lines are synced as quickly as possible - if err := f.Sync(); err != nil { - t.Errorf("could not sync file '%s': %s", fullPath, err) - } - } - }() -} - -func TestPureServe(t *testing.T) { +// TestInputReloadUnderElasticAgent will start a Filebeat and cause the input +// reload issue described on https://github.com/elastic/beats/issues/33653 +// to happen, then it will check to logs to ensure the fix is working. +// +// In case of a test failure the directory with Filebeat logs and +// all other supporting files will be kept on build/integration-tests. +// +// Run the tests wit -v flag to print the temporary folder used. +func TestInputReloadUnderElasticAgent(t *testing.T) { // We create our own temp dir so the files can be persisted // in case the test fails. This will help debugging issues on CI // @@ -66,17 +46,16 @@ func TestPureServe(t *testing.T) { if err := os.MkdirAll(tempDir, 0766); err != nil { t.Fatalf("cannot create tmp dir: %#v, msg: %s", err, err.Error()) } - defer func() { testFailed = true }() //debug stuff t.Cleanup(func() { if !testFailed { if err := os.RemoveAll(tempDir); err != nil { t.Fatalf("could not remove temp dir '%s': %s", tempDir, err) } - t.Logf("temprary directory '%s' removed", tempDir) + t.Logf("Temprary directory '%s' removed", tempDir) } }) - t.Logf("temporary directory: %s", tempDir) + t.Logf("Temporary directory: %s", tempDir) logFilePath := filepath.Join(tempDir, "flog.log") generateLogFile(t, logFilePath) @@ -190,7 +169,7 @@ func TestPureServe(t *testing.T) { } server := &mock.StubServerV2{ CheckinV2Impl: func(observed *proto.CheckinObserved) *proto.CheckinExpected { - if doesStateMatch(observed, units[idx], 0) { + if management.DoesStateMatch(observed, units[idx], 0) { nextState() } for _, unit := range observed.GetUnits() { @@ -215,7 +194,6 @@ func TestPureServe(t *testing.T) { t, "../../filebeat.test", []string{ - // "-d", "centralmgmt, centralmgmt.V2-manager", "-E", fmt.Sprintf("management.insecure_grpc_url_for_testing=\"localhost:%d\"", server.Port), "-E", "management.enabled=true", }, @@ -224,11 +202,11 @@ func TestPureServe(t *testing.T) { p.Start() - p.LogContains("Can only start an input when all related states are finished", 2*time.Minute) // logger: centralmgmt - p.LogContains("file 'flog.log' is not finished, will retry starting the input soon", 2*time.Minute) // logger: centralmgmt.V2-manager - p.LogContains("ForceReload set to TRUE", 2*time.Minute) // logger: centralmgmt.V2-manager - p.LogContains("Reloading Beats inputs because forceReload is true", 2*time.Minute) // logger: centralmgmt.V2-manager - p.LogContains("ForceReload set to FALSE", 2*time.Minute) // logger: centralmgmt.V2-manager + p.LogContains("Can only start an input when all related states are finished", 5*time.Minute) // logger: centralmgmt + p.LogContains("file 'flog.log' is not finished, will retry starting the input soon", 5*time.Minute) // logger: centralmgmt.V2-manager + p.LogContains("ForceReload set to TRUE", 5*time.Minute) // logger: centralmgmt.V2-manager + p.LogContains("Reloading Beats inputs because forceReload is true", 5*time.Minute) // logger: centralmgmt.V2-manager + p.LogContains("ForceReload set to FALSE", 5*time.Minute) // logger: centralmgmt.V2-manager // Set it to false, so the temporaty directory is removed testFailed = false @@ -275,3 +253,32 @@ func requireNewStruct(t *testing.T, v map[string]interface{}) *structpb.Struct { } return str } + +func generateLogFile(t *testing.T, fullPath string) { + t.Helper() + f, err := os.Create(fullPath) + if err != nil { + t.Fatalf("could not create file '%s: %s", fullPath, err) + } + + go func() { + t.Helper() + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + defer f.Close() + for { + now := <-ticker.C + _, err := fmt.Fprintln(f, now.Format(time.RFC3339)) + if err != nil { + // The Go compiler does not allow me to call t.Fatalf from a non-test + // goroutine, so just log it instead + t.Errorf("could not write data to log file '%s': %s", fullPath, err) + return + } + // make sure log lines are synced as quickly as possible + if err := f.Sync(); err != nil { + t.Errorf("could not sync file '%s': %s", fullPath, err) + } + } + }() +} diff --git a/x-pack/libbeat/management/managerV2_test.go b/x-pack/libbeat/management/managerV2_test.go index 952263a43fbc..c86e84c94898 100644 --- a/x-pack/libbeat/management/managerV2_test.go +++ b/x-pack/libbeat/management/managerV2_test.go @@ -239,7 +239,7 @@ func mockSrv( if observedCallback != nil { observedCallback(observed, i) } - matches := doesStateMatch(observed, units[i], featuresIdxs[i]) + matches := DoesStateMatch(observed, units[i], featuresIdxs[i]) if !matches { // send same set of units and features return &proto.CheckinExpected{ @@ -274,35 +274,6 @@ func mockSrv( } } -func doesStateMatch( - observed *proto.CheckinObserved, - expectedUnits []*proto.UnitExpected, - expectedFeaturesIdx uint64, -) bool { - if len(observed.Units) != len(expectedUnits) { - return false - } - expectedMap := make(map[unitKey]*proto.UnitExpected) - for _, exp := range expectedUnits { - expectedMap[unitKey{client.UnitType(exp.Type), exp.Id}] = exp - } - for _, unit := range observed.Units { - exp, ok := expectedMap[unitKey{client.UnitType(unit.Type), unit.Id}] - if !ok { - return false - } - if unit.State != exp.State || unit.ConfigStateIdx != exp.ConfigStateIdx { - return false - } - } - - if observed.FeaturesIdx != expectedFeaturesIdx { - return false - } - - return true -} - type reloadable struct { mx sync.Mutex config *reload.ConfigWithMeta diff --git a/x-pack/libbeat/management/test_utils.go b/x-pack/libbeat/management/test_utils.go new file mode 100644 index 000000000000..53715f44a36c --- /dev/null +++ b/x-pack/libbeat/management/test_utils.go @@ -0,0 +1,35 @@ +package management + +import ( + "github.com/elastic/elastic-agent-client/v7/pkg/client" + "github.com/elastic/elastic-agent-client/v7/pkg/proto" +) + +func DoesStateMatch( + observed *proto.CheckinObserved, + expectedUnits []*proto.UnitExpected, + expectedFeaturesIdx uint64, +) bool { + if len(observed.Units) != len(expectedUnits) { + return false + } + expectedMap := make(map[unitKey]*proto.UnitExpected) + for _, exp := range expectedUnits { + expectedMap[unitKey{client.UnitType(exp.Type), exp.Id}] = exp + } + for _, unit := range observed.Units { + exp, ok := expectedMap[unitKey{client.UnitType(unit.Type), unit.Id}] + if !ok { + return false + } + if unit.State != exp.State || unit.ConfigStateIdx != exp.ConfigStateIdx { + return false + } + } + + if observed.FeaturesIdx != expectedFeaturesIdx { + return false + } + + return true +} From e65618b0d639f4dc6fbd916693cfcf9ef63563d5 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 8 May 2023 15:57:42 +0200 Subject: [PATCH 17/46] debounce and force reload times are configurable --- .../libbeat/management/input_reload_test.go | 12 ++++-- x-pack/libbeat/management/managerV2.go | 43 +++++++++++++------ x-pack/libbeat/management/managerV2_test.go | 1 - x-pack/libbeat/management/test_utils.go | 4 ++ 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go index 64a2eeac0c95..68001be605cd 100644 --- a/x-pack/libbeat/management/input_reload_test.go +++ b/x-pack/libbeat/management/input_reload_test.go @@ -140,9 +140,15 @@ func TestInputReload(t *testing.T) { }, }, grpc.WithTransportCredentials(insecure.NewCredentials())) - m, err := NewV2AgentManagerWithClient(&Config{ - Enabled: true, - }, r, client) + m, err := NewV2AgentManagerWithClient( + &Config{ + Enabled: true, + }, + r, + client, + WithChangeDebounce(300*time.Millisecond), + WithForceReloadDebounce(800*time.Millisecond), + ) require.NoError(t, err) mm := m.(*BeatV2Manager) diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 00c3b3918722..5ada91ea1e07 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -89,6 +89,13 @@ type BeatV2Manager struct { lastBeatOutputCfg *reload.ConfigWithMeta lastBeatInputCfgs []*reload.ConfigWithMeta lastBeatFeaturesCfg *conf.C + + // changeDebounce is the debounce time for a configuration change + changeDebounce time.Duration + // forceReloadDebounce is the time the manager will wait before + // trying to reload the configuration after an input not finished error + // happens + forceReloadDebounce time.Duration } // ================================ @@ -100,6 +107,20 @@ func WithStopOnEmptyUnits(m *BeatV2Manager) { m.stopOnEmptyUnits = true } +// WithChangeDebounce sets the changeDeboung value +func WithChangeDebounce(d time.Duration) func(b *BeatV2Manager) { + return func(b *BeatV2Manager) { + b.changeDebounce = d + } +} + +// WithForceReloadDebounce sets the forceReloadDebounce value +func WithForceReloadDebounce(d time.Duration) func(b *BeatV2Manager) { + return func(b *BeatV2Manager) { + b.forceReloadDebounce = d + } +} + // ================================ // Init Functions // ================================ @@ -171,6 +192,12 @@ func NewV2AgentManagerWithClient(config *Config, registry *reload.Registry, agen status: lbmanagement.Running, message: "Healthy", stopChan: make(chan struct{}, 1), + changeDebounce: time.Second, + // forceReloadDebounce is greater than changeDebounce because it is only + // used when an input has not reached its finished state, this means some events + // still need to be acked by the acker, hence the longer we wait the more likely + // for the input to have reached its finished state. + forceReloadDebounce: time.Second * 10, } if config.Enabled { @@ -399,21 +426,13 @@ func (cm *BeatV2Manager) watchErrChan(ctx context.Context) { } func (cm *BeatV2Manager) unitListen() { - const changeDebounce = time.Second - - // forceReloadDebounce is greater than changeDebounce because it is only - // used when an input has not reached its finished state, this means some events - // still need to be acked by the acker, hence the longer we wait the more likely - // for the input to have reached its finished state. - const forceReloadDebounce = changeDebounce * 10 - // register signal handler sigc := make(chan os.Signal, 1) signal.Notify(sigc, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) // timer is used to provide debounce on unit changes // this allows multiple changes to come in and only a single reload be performed - t := time.NewTimer(changeDebounce) + t := time.NewTimer(cm.changeDebounce) t.Stop() // starts stopped, until a change occurs cm.logger.Debug("Listening for agent unit changes") @@ -448,11 +467,11 @@ func (cm *BeatV2Manager) unitListen() { case client.UnitChangedAdded: cm.addUnit(change.Unit) // reset can be called here because `<-t.C` is handled in the same select - t.Reset(changeDebounce) + t.Reset(cm.changeDebounce) case client.UnitChangedModified: cm.modifyUnit(change.Unit) // reset can be called here because `<-t.C` is handled in the same select - t.Reset(changeDebounce) + t.Reset(cm.changeDebounce) case client.UnitChangedRemoved: cm.deleteUnit(change.Unit) } @@ -469,7 +488,7 @@ func (cm *BeatV2Manager) unitListen() { cm.reload(units) if cm.forceReload { // Restart the debounce timer so we try to reload the inputs. - t.Reset(forceReloadDebounce) + t.Reset(cm.forceReloadDebounce) } } } diff --git a/x-pack/libbeat/management/managerV2_test.go b/x-pack/libbeat/management/managerV2_test.go index c86e84c94898..538dd0c58023 100644 --- a/x-pack/libbeat/management/managerV2_test.go +++ b/x-pack/libbeat/management/managerV2_test.go @@ -25,7 +25,6 @@ import ( ) func TestManagerV2(t *testing.T) { - r := reload.NewRegistry() output := &reloadable{} diff --git a/x-pack/libbeat/management/test_utils.go b/x-pack/libbeat/management/test_utils.go index 53715f44a36c..b4eecac9b8d6 100644 --- a/x-pack/libbeat/management/test_utils.go +++ b/x-pack/libbeat/management/test_utils.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package management import ( From b3028a72b95cb97391f446d45aa2c5b6204985a6 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 8 May 2023 17:56:26 +0200 Subject: [PATCH 18/46] clean up --- .../tests/integration/framework_test.go | 2 +- .../{integ_test.go => input_reload_test.go} | 37 +------------------ x-pack/libbeat/management/managerV2.go | 4 +- x-pack/libbeat/management/test_utils.go | 2 + 4 files changed, 6 insertions(+), 39 deletions(-) rename x-pack/filebeat/tests/integration/{integ_test.go => input_reload_test.go} (89%) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index fce07ff5b2e1..f7a1834fb645 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -4,7 +4,7 @@ //go:build integration -package input +package integration import ( "bufio" diff --git a/x-pack/filebeat/tests/integration/integ_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go similarity index 89% rename from x-pack/filebeat/tests/integration/integ_test.go rename to x-pack/filebeat/tests/integration/input_reload_test.go index 735b5056c336..13c0cf0ac010 100644 --- a/x-pack/filebeat/tests/integration/integ_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -4,7 +4,7 @@ //go:build integration -package input +package integration import ( "fmt" @@ -17,7 +17,6 @@ import ( "google.golang.org/protobuf/types/known/structpb" "github.com/elastic/beats/v7/x-pack/libbeat/management" - "github.com/elastic/elastic-agent-client/v7/pkg/client" "github.com/elastic/elastic-agent-client/v7/pkg/client/mock" "github.com/elastic/elastic-agent-client/v7/pkg/proto" ) @@ -212,40 +211,6 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { testFailed = false } -func doesStateMatch( - observed *proto.CheckinObserved, - expectedUnits []*proto.UnitExpected, - expectedFeaturesIdx uint64, -) bool { - if len(observed.Units) != len(expectedUnits) { - return false - } - expectedMap := make(map[unitKey]*proto.UnitExpected) - for _, exp := range expectedUnits { - expectedMap[unitKey{client.UnitType(exp.Type), exp.Id}] = exp - } - for _, unit := range observed.Units { - exp, ok := expectedMap[unitKey{client.UnitType(unit.Type), unit.Id}] - if !ok { - return false - } - if unit.State != exp.State || unit.ConfigStateIdx != exp.ConfigStateIdx { - return false - } - } - - if observed.FeaturesIdx != expectedFeaturesIdx { - return false - } - - return true -} - -type unitKey struct { - Type client.UnitType - ID string -} - func requireNewStruct(t *testing.T, v map[string]interface{}) *structpb.Struct { str, err := structpb.NewStruct(v) if err != nil { diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 5ada91ea1e07..907dba468c90 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -151,8 +151,8 @@ func NewV2AgentManager(config *conf.C, registry *reload.Registry) (lbmanagement. agentClient = client.NewV2(c.InsecureGRPCURLForTesting, "", // Insecure connection for test, no token needed client.VersionInfo{ - Name: "testing program", - Version: "v1.0.0", + Name: "beat-v2-client-for-testing", + Version: version.GetDefaultVersion(), }, grpc.WithTransportCredentials(insecure.NewCredentials())) } else { // Normal Elastic-Agent-Client initialisation diff --git a/x-pack/libbeat/management/test_utils.go b/x-pack/libbeat/management/test_utils.go index b4eecac9b8d6..5f8e858636c7 100644 --- a/x-pack/libbeat/management/test_utils.go +++ b/x-pack/libbeat/management/test_utils.go @@ -9,6 +9,8 @@ import ( "github.com/elastic/elastic-agent-client/v7/pkg/proto" ) +// DoesStateMatch retkurns true if the observed state matches +// the expectedUnits and Features. func DoesStateMatch( observed *proto.CheckinObserved, expectedUnits []*proto.UnitExpected, From 78930f0c88a3988c33ba007c05bc50ecc94bc58d Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 8 May 2023 18:53:20 +0200 Subject: [PATCH 19/46] make linter happy --- x-pack/libbeat/management/test_utils.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/x-pack/libbeat/management/test_utils.go b/x-pack/libbeat/management/test_utils.go index 5f8e858636c7..6227e58870c6 100644 --- a/x-pack/libbeat/management/test_utils.go +++ b/x-pack/libbeat/management/test_utils.go @@ -33,9 +33,5 @@ func DoesStateMatch( } } - if observed.FeaturesIdx != expectedFeaturesIdx { - return false - } - - return true + return observed.FeaturesIdx == expectedFeaturesIdx } From 0049bd1fe716f1a752a749f3acf2c0e49de2f631 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 8 May 2023 20:59:02 +0200 Subject: [PATCH 20/46] try uploading test logs on failure --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 3d8f004297f9..a19ce2a11c0a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -708,6 +708,7 @@ def withBeatsEnv(Map args = [:], Closure body) { } finally { if (archive) { archiveArtifacts(allowEmptyArchive: true, artifacts: "${directory}/build/system-tests/docker-logs/TEST-docker-compose-*.log") + archiveArtifacts(allowEmptyArchive: true, artifacts: "${directory}/build//build/integration-tests/*") archiveTestOutput(directory: directory, testResults: testResults, artifacts: artifacts, id: args.id, upload: upload) } tearDown() From ea5d84f30499e02a800f475ebcf513056fa723ad Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 9 May 2023 10:36:03 +0200 Subject: [PATCH 21/46] fix archiveArtifacts path --- Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a19ce2a11c0a..1da726b38cd1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -707,8 +707,7 @@ def withBeatsEnv(Map args = [:], Closure body) { error("Error '${err.toString()}'") } finally { if (archive) { - archiveArtifacts(allowEmptyArchive: true, artifacts: "${directory}/build/system-tests/docker-logs/TEST-docker-compose-*.log") - archiveArtifacts(allowEmptyArchive: true, artifacts: "${directory}/build//build/integration-tests/*") + archiveArtifacts(allowEmptyArchive: true, artifacts: "${directory}/build/system-tests/docker-logs/TEST-docker-compose-*.log, ${directory}/build/integration-tests/**/**") archiveTestOutput(directory: directory, testResults: testResults, artifacts: artifacts, id: args.id, upload: upload) } tearDown() From f12f5e92dea0e31033fa03b7b9107928018a399f Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 9 May 2023 16:41:38 +0200 Subject: [PATCH 22/46] debugging CI --- x-pack/filebeat/tests/integration/framework_test.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index f7a1834fb645..25e8d9a7cfe1 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -9,6 +9,7 @@ package integration import ( "bufio" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -72,10 +73,20 @@ func (b *BeatProc) LogContains(s string, timeout time.Duration) bool { timer := time.NewTimer(timeout) defer timer.Stop() + linesScanned := 0 + defer func() { + b.t.Logf("lines scanned: %d", linesScanned) + pos, err := logFile.Seek(0, io.SeekCurrent) + if err != nil { + b.t.Errorf("could not seek file '%s': %s", logFile.Name(), err) + } + b.t.Logf("last position on '%s': %d", logFile.Name(), pos) + }() for { select { default: if scanner.Scan() { + linesScanned++ if strings.Contains(scanner.Text(), s) { return true } @@ -131,5 +142,7 @@ func (b *BeatProc) openLogFile() *os.File { t.Fatalf("could not open log file '%s': %s", files[0], err) } t.Cleanup(func() { f.Close() }) + + t.Logf("file: '%s' successfully opened", files[0]) return f } From cff80edfe2c3e39a2f6e6de18aacb8ca86c2a1f7 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 9 May 2023 18:30:19 +0200 Subject: [PATCH 23/46] refactor the way logs are read. --- .../tests/integration/framework_test.go | 60 ++++++++----------- .../tests/integration/input_reload_test.go | 30 ++++++++-- 2 files changed, 51 insertions(+), 39 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 25e8d9a7cfe1..99dca5a58bee 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -9,7 +9,6 @@ package integration import ( "bufio" "fmt" - "io" "os" "os/exec" "path/filepath" @@ -66,43 +65,37 @@ func (b *BeatProc) Start() { // LogContains looks for s as a sub string of every log line, // if it's not found until timeout is reached, the test fails -func (b *BeatProc) LogContains(s string, timeout time.Duration) bool { +func (b *BeatProc) LogContains(s string) bool { logFile := b.openLogFile() - scanner := bufio.NewScanner(logFile) - - timer := time.NewTimer(timeout) - defer timer.Stop() - - linesScanned := 0 defer func() { - b.t.Logf("lines scanned: %d", linesScanned) - pos, err := logFile.Seek(0, io.SeekCurrent) - if err != nil { - b.t.Errorf("could not seek file '%s': %s", logFile.Name(), err) + if err := logFile.Close(); err != nil { + // That's not quite a test error, but it can impact + // next executions of LogContains, so treat it as an error + b.t.Errorf("could not close log file: %s", err) } - b.t.Logf("last position on '%s': %d", logFile.Name(), pos) }() - for { - select { - default: - if scanner.Scan() { - linesScanned++ - if strings.Contains(scanner.Text(), s) { - return true - } - } - // scanner.Scan() will return false when it reaches the end of the file, - // then it will stop reading from the file. - // So if it's error is nil, we create a new scanner - // - // This kind of creates a busy loop, but for a test it's acceptable. - if err := scanner.Err(); err == nil { - scanner = bufio.NewScanner(logFile) - } - case <-timer.C: - b.t.Fatalf("timeout reached while trying to find '%s' in the logs", s) + scanner := bufio.NewScanner(logFile) + + // TODO(Tiago) Remove this very verbose debugging code + // startTime := time.Now() + // linesScanned := 0 + // defer func() { + // b.t.Logf("lines scanned: %d", linesScanned) + // pos, err := logFile.Seek(0, io.SeekCurrent) + // if err != nil { + // b.t.Errorf("could not seek file '%s': %s", logFile.Name(), err) + // } + // b.t.Logf("last position on '%s': %d", logFile.Name(), pos) + // b.t.Logf("took %s", time.Now().Sub(startTime).String()) + // }() + for scanner.Scan() { + // linesScanned++ + if strings.Contains(scanner.Text(), s) { + return true } } + + return false } // openLogFile opens the log file for reading and returns it. @@ -141,8 +134,7 @@ func (b *BeatProc) openLogFile() *os.File { if err != nil { t.Fatalf("could not open log file '%s': %s", files[0], err) } - t.Cleanup(func() { f.Close() }) - t.Logf("file: '%s' successfully opened", files[0]) + // t.Logf("file: '%s' successfully opened", files[0]) return f } diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 13c0cf0ac010..68f06204c8b1 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -201,11 +201,31 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { p.Start() - p.LogContains("Can only start an input when all related states are finished", 5*time.Minute) // logger: centralmgmt - p.LogContains("file 'flog.log' is not finished, will retry starting the input soon", 5*time.Minute) // logger: centralmgmt.V2-manager - p.LogContains("ForceReload set to TRUE", 5*time.Minute) // logger: centralmgmt.V2-manager - p.LogContains("Reloading Beats inputs because forceReload is true", 5*time.Minute) // logger: centralmgmt.V2-manager - p.LogContains("ForceReload set to FALSE", 5*time.Minute) // logger: centralmgmt.V2-manager + // logger: centralmgmt + // logger: centralmgmt.V2-manager + // logger: centralmgmt.V2-manager + // logger: centralmgmt.V2-manager + // logger: centralmgmt.V2-manager + + require.Eventually(t, func() bool { + return p.LogContains("Can only start an input when all related states are finished") + }, 5*time.Minute, 200*time.Millisecond) + + require.Eventually(t, func() bool { + return p.LogContains("file 'flog.log' is not finished, will retry starting the input soon") + }, 5*time.Minute, 200*time.Millisecond) + + require.Eventually(t, func() bool { + return p.LogContains("ForceReload set to TRUE") + }, 5*time.Minute, 200*time.Millisecond) + + require.Eventually(t, func() bool { + return p.LogContains("Reloading Beats inputs because forceReload is true") + }, 5*time.Minute, 200*time.Millisecond) + + require.Eventually(t, func() bool { + return p.LogContains("ForceReload set to FALSE") + }, 5*time.Minute, 200*time.Millisecond) // Set it to false, so the temporaty directory is removed testFailed = false From 04a87d04f3cfc043d1e9c49b26472699dddd979f Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 9 May 2023 19:29:04 +0200 Subject: [PATCH 24/46] More CI debugging. --- .../tests/integration/framework_test.go | 27 ++++++++++--------- .../tests/integration/input_reload_test.go | 10 +++---- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 99dca5a58bee..fd7150ab575e 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -9,6 +9,7 @@ package integration import ( "bufio" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -77,19 +78,19 @@ func (b *BeatProc) LogContains(s string) bool { scanner := bufio.NewScanner(logFile) // TODO(Tiago) Remove this very verbose debugging code - // startTime := time.Now() - // linesScanned := 0 - // defer func() { - // b.t.Logf("lines scanned: %d", linesScanned) - // pos, err := logFile.Seek(0, io.SeekCurrent) - // if err != nil { - // b.t.Errorf("could not seek file '%s': %s", logFile.Name(), err) - // } - // b.t.Logf("last position on '%s': %d", logFile.Name(), pos) - // b.t.Logf("took %s", time.Now().Sub(startTime).String()) - // }() + startTime := time.Now() + linesScanned := 0 + defer func() { + b.t.Logf("lines scanned: %d", linesScanned) + pos, err := logFile.Seek(0, io.SeekCurrent) + if err != nil { + b.t.Errorf("could not seek file '%s': %s", logFile.Name(), err) + } + b.t.Logf("last position on '%s': %d", logFile.Name(), pos) + b.t.Logf("took %s", time.Now().Sub(startTime).String()) + }() for scanner.Scan() { - // linesScanned++ + linesScanned++ if strings.Contains(scanner.Text(), s) { return true } @@ -135,6 +136,6 @@ func (b *BeatProc) openLogFile() *os.File { t.Fatalf("could not open log file '%s': %s", files[0], err) } - // t.Logf("file: '%s' successfully opened", files[0]) + t.Logf("file: '%s' successfully opened", files[0]) return f } diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 68f06204c8b1..638ebca23dbf 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -209,23 +209,23 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { require.Eventually(t, func() bool { return p.LogContains("Can only start an input when all related states are finished") - }, 5*time.Minute, 200*time.Millisecond) + }, 5*time.Minute, 10*time.Second) require.Eventually(t, func() bool { return p.LogContains("file 'flog.log' is not finished, will retry starting the input soon") - }, 5*time.Minute, 200*time.Millisecond) + }, 5*time.Minute, 10*time.Second) require.Eventually(t, func() bool { return p.LogContains("ForceReload set to TRUE") - }, 5*time.Minute, 200*time.Millisecond) + }, 5*time.Minute, 10*time.Second) require.Eventually(t, func() bool { return p.LogContains("Reloading Beats inputs because forceReload is true") - }, 5*time.Minute, 200*time.Millisecond) + }, 5*time.Minute, 10*time.Second) require.Eventually(t, func() bool { return p.LogContains("ForceReload set to FALSE") - }, 5*time.Minute, 200*time.Millisecond) + }, 5*time.Minute, 10*time.Second) // Set it to false, so the temporaty directory is removed testFailed = false From 3a299a66fa9b67707f8cace7281afab0c89afe82 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 10 May 2023 12:11:26 +0200 Subject: [PATCH 25/46] Apply suggestions from code review Co-authored-by: Blake Rouse Co-authored-by: Anderson Queiroz --- filebeat/input/log/input.go | 2 +- x-pack/filebeat/tests/integration/input_reload_test.go | 2 +- x-pack/libbeat/management/input_reload_test.go | 2 +- x-pack/libbeat/management/test_utils.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/filebeat/input/log/input.go b/filebeat/input/log/input.go index b51f644af003..4882735d36b7 100644 --- a/filebeat/input/log/input.go +++ b/filebeat/input/log/input.go @@ -94,7 +94,7 @@ func NewInput( cleanupIfNeeded := func(f func() error) { if cleanupNeeded { if err := f(); err != nil { - logp.L().Named("input.log").Errorf("clean up function retruned an error: %w", err) + logp.L().Named("input.log").Errorf("clean up function returned an error: %w", err) } } } diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 638ebca23dbf..db30e6a65c56 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -23,7 +23,7 @@ import ( // TestInputReloadUnderElasticAgent will start a Filebeat and cause the input // reload issue described on https://github.com/elastic/beats/issues/33653 -// to happen, then it will check to logs to ensure the fix is working. +// to happen, then it will check the logs to ensure the fix is working. // // In case of a test failure the directory with Filebeat logs and // all other supporting files will be kept on build/integration-tests. diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go index 68001be605cd..f0b9ce99a506 100644 --- a/x-pack/libbeat/management/input_reload_test.go +++ b/x-pack/libbeat/management/input_reload_test.go @@ -22,7 +22,7 @@ import ( ) func TestInputReload(t *testing.T) { - // Uncomment this line to see the debug logs for this test + // Uncomment the line below to see the debug logs for this test // logp.DevelopmentSetup(logp.WithLevel(logp.DebugLevel), logp.WithSelectors("*", "centralmgmt.V2-manager")) r := reload.NewRegistry() diff --git a/x-pack/libbeat/management/test_utils.go b/x-pack/libbeat/management/test_utils.go index 6227e58870c6..fba637154b5c 100644 --- a/x-pack/libbeat/management/test_utils.go +++ b/x-pack/libbeat/management/test_utils.go @@ -9,7 +9,7 @@ import ( "github.com/elastic/elastic-agent-client/v7/pkg/proto" ) -// DoesStateMatch retkurns true if the observed state matches +// DoesStateMatch returns true if the observed state matches // the expectedUnits and Features. func DoesStateMatch( observed *proto.CheckinObserved, From 2424579f05d0443498d4c4a1f192fa297c1ca9e8 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 10 May 2023 12:27:50 +0200 Subject: [PATCH 26/46] PR review changes --- x-pack/filebeat/tests/integration/framework_test.go | 3 ++- x-pack/filebeat/tests/integration/input_reload_test.go | 10 +++++----- x-pack/libbeat/management/input_reload_test.go | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index fd7150ab575e..fdd787f0d416 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -65,7 +65,8 @@ func (b *BeatProc) Start() { } // LogContains looks for s as a sub string of every log line, -// if it's not found until timeout is reached, the test fails +// it will open the log file on every call, read it until EOF, +// then close it. func (b *BeatProc) LogContains(s string) bool { logFile := b.openLogFile() defer func() { diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index db30e6a65c56..8a53ad113aa4 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -33,9 +33,9 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { // We create our own temp dir so the files can be persisted // in case the test fails. This will help debugging issues on CI // - // testFailed will be set to 'false' as the very last thing on this test, + // testSucceeded will be set to 'true' as the very last thing on this test, // it allows us to use t.CleanUp to remove the temporary files - testFailed := true + testSucceeded := false tempDir, err := filepath.Abs(filepath.Join("../../build/integration-tests/", fmt.Sprintf("%s-%d", t.Name(), time.Now().Unix()))) if err != nil { @@ -46,7 +46,7 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { t.Fatalf("cannot create tmp dir: %#v, msg: %s", err, err.Error()) } t.Cleanup(func() { - if !testFailed { + if testSucceeded { if err := os.RemoveAll(tempDir); err != nil { t.Fatalf("could not remove temp dir '%s': %s", tempDir, err) } @@ -227,8 +227,8 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { return p.LogContains("ForceReload set to FALSE") }, 5*time.Minute, 10*time.Second) - // Set it to false, so the temporaty directory is removed - testFailed = false + // Set it to true, so the temporaty directory is removed + testSucceeded = true } func requireNewStruct(t *testing.T, v map[string]interface{}) *structpb.Struct { diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go index f0b9ce99a506..6e05dc3bd4cd 100644 --- a/x-pack/libbeat/management/input_reload_test.go +++ b/x-pack/libbeat/management/input_reload_test.go @@ -157,7 +157,7 @@ func TestInputReload(t *testing.T) { require.NoError(t, err) defer m.Stop() - forceReloadState := []bool{false, true, false} + forceReloadStates := []bool{false, true, false} forceReloadStateIdx := 0 forceReloadLastState := true // starts on true so the first iteratiction is already a change @@ -167,13 +167,13 @@ func TestInputReload(t *testing.T) { // on state changes if forceReload != forceReloadLastState { forceReloadLastState = forceReload - if forceReload == forceReloadState[forceReloadStateIdx] { + if forceReload == forceReloadStates[forceReloadStateIdx] { // Setp to the next state forceReloadStateIdx++ } // If we went through all states, then succeed - if forceReloadStateIdx == len(forceReloadState) { + if forceReloadStateIdx == len(forceReloadStates) { // If we went through all states if configIdx == 1 { return true From 892e4ed60adfaa79fc8ad52784240666bb5f7ab2 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 10 May 2023 12:28:05 +0200 Subject: [PATCH 27/46] more debug logs for CI --- x-pack/filebeat/tests/integration/framework_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index fdd787f0d416..3de675be6e1d 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -97,6 +97,12 @@ func (b *BeatProc) LogContains(s string) bool { } } + fstat, err := logFile.Stat() + if err != nil { + b.t.Logf("cannot stat file: %s:", err) + } + b.t.Logf("[Stat] Name: %s, Size %d, ModTime: %s, Sys: %#v", fstat.Name(), fstat.Size(), fstat.ModTime().Format(time.RFC3339), fstat.Sys()) + return false } From 5f44afb140a99fc81a32af8943e5bc55a620634d Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 10 May 2023 20:30:48 +0200 Subject: [PATCH 28/46] use `bufio.Reader` instead of `bufio.Scanner` I found some log lines as big as 420kb, the Scanner could not handle it, but the reader seems to have no problems dealing with it and will be more clear if there is an error. --- .../tests/integration/framework_test.go | 31 ++++++++++++------- x-pack/libbeat/management/managerV2.go | 11 +++++-- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 3de675be6e1d..3d2b87c12428 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -68,40 +68,49 @@ func (b *BeatProc) Start() { // it will open the log file on every call, read it until EOF, // then close it. func (b *BeatProc) LogContains(s string) bool { + t := b.t logFile := b.openLogFile() defer func() { if err := logFile.Close(); err != nil { // That's not quite a test error, but it can impact // next executions of LogContains, so treat it as an error - b.t.Errorf("could not close log file: %s", err) + t.Errorf("could not close log file: %s", err) } }() - scanner := bufio.NewScanner(logFile) - // TODO(Tiago) Remove this very verbose debugging code + // TODO(Tiago) Remove this very verbose debugging codex startTime := time.Now() linesScanned := 0 defer func() { - b.t.Logf("lines scanned: %d", linesScanned) + t.Logf("lines scanned: %d", linesScanned) pos, err := logFile.Seek(0, io.SeekCurrent) if err != nil { - b.t.Errorf("could not seek file '%s': %s", logFile.Name(), err) + t.Errorf("could not seek file '%s': %s", logFile.Name(), err) } - b.t.Logf("last position on '%s': %d", logFile.Name(), pos) - b.t.Logf("took %s", time.Now().Sub(startTime).String()) + t.Logf("last position on '%s': %d", logFile.Name(), pos) + t.Logf("took %s", time.Now().Sub(startTime).String()) }() - for scanner.Scan() { + + r := bufio.NewReader(logFile) + for { + line, err := r.ReadString('\n') + if err != nil { + if err != io.EOF { + t.Fatalf("error reading log file '%s': %s", logFile.Name(), err) + } + break + } linesScanned++ - if strings.Contains(scanner.Text(), s) { + if strings.Contains(line, s) { return true } } fstat, err := logFile.Stat() if err != nil { - b.t.Logf("cannot stat file: %s:", err) + t.Logf("cannot stat file: %s:", err) } - b.t.Logf("[Stat] Name: %s, Size %d, ModTime: %s, Sys: %#v", fstat.Name(), fstat.Size(), fstat.ModTime().Format(time.RFC3339), fstat.Sys()) + t.Logf("[Stat] Name: %s, Size %d, ModTime: %s, Sys: %#v", fstat.Name(), fstat.Size(), fstat.ModTime().Format(time.RFC3339), fstat.Sys()) return false } diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 907dba468c90..43e3805353bb 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -739,9 +739,14 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { return fmt.Errorf("failed to reload inputs: %w", realErrors.Err()) } } else { - // no issues while reloading inputs, set forceReload to false - cm.forceReload = false - cm.logger.Debug("ForceReload set to FALSE") + // If there was no error reloading input and forceReload was + // true, then set it to false. This prevents unecessary logging + // and make it clear this was the monent when the input reload + // finally worked. + if cm.forceReload { + cm.forceReload = false + cm.logger.Debug("ForceReload set to FALSE") + } } cm.lastInputCfgs = inputCfgs From c153d741f14a7ded4ec8354f5cf14bf7ba78c417 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 10 May 2023 21:01:06 +0200 Subject: [PATCH 29/46] typo --- x-pack/libbeat/management/managerV2.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 43e3805353bb..c04c144e3839 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -740,7 +740,7 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { } } else { // If there was no error reloading input and forceReload was - // true, then set it to false. This prevents unecessary logging + // true, then set it to false. This prevents unnecessary logging // and make it clear this was the monent when the input reload // finally worked. if cm.forceReload { From 12a0ba54d8e2cb212eb561ab739ce1948d8a5e50 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Thu, 11 May 2023 11:13:13 +0200 Subject: [PATCH 30/46] clean up after all fixes/debugging --- .../tests/integration/framework_test.go | 21 ------------------- .../tests/integration/input_reload_test.go | 16 +++++--------- 2 files changed, 5 insertions(+), 32 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 3d2b87c12428..503126fb8a7c 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -78,19 +78,6 @@ func (b *BeatProc) LogContains(s string) bool { } }() - // TODO(Tiago) Remove this very verbose debugging codex - startTime := time.Now() - linesScanned := 0 - defer func() { - t.Logf("lines scanned: %d", linesScanned) - pos, err := logFile.Seek(0, io.SeekCurrent) - if err != nil { - t.Errorf("could not seek file '%s': %s", logFile.Name(), err) - } - t.Logf("last position on '%s': %d", logFile.Name(), pos) - t.Logf("took %s", time.Now().Sub(startTime).String()) - }() - r := bufio.NewReader(logFile) for { line, err := r.ReadString('\n') @@ -100,18 +87,11 @@ func (b *BeatProc) LogContains(s string) bool { } break } - linesScanned++ if strings.Contains(line, s) { return true } } - fstat, err := logFile.Stat() - if err != nil { - t.Logf("cannot stat file: %s:", err) - } - t.Logf("[Stat] Name: %s, Size %d, ModTime: %s, Sys: %#v", fstat.Name(), fstat.Size(), fstat.ModTime().Format(time.RFC3339), fstat.Sys()) - return false } @@ -152,6 +132,5 @@ func (b *BeatProc) openLogFile() *os.File { t.Fatalf("could not open log file '%s': %s", files[0], err) } - t.Logf("file: '%s' successfully opened", files[0]) return f } diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 8a53ad113aa4..53dce710e76f 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -201,31 +201,25 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { p.Start() - // logger: centralmgmt - // logger: centralmgmt.V2-manager - // logger: centralmgmt.V2-manager - // logger: centralmgmt.V2-manager - // logger: centralmgmt.V2-manager - require.Eventually(t, func() bool { return p.LogContains("Can only start an input when all related states are finished") - }, 5*time.Minute, 10*time.Second) + }, 5*time.Minute, 100*time.Millisecond) require.Eventually(t, func() bool { return p.LogContains("file 'flog.log' is not finished, will retry starting the input soon") - }, 5*time.Minute, 10*time.Second) + }, 5*time.Minute, 100*time.Millisecond) require.Eventually(t, func() bool { return p.LogContains("ForceReload set to TRUE") - }, 5*time.Minute, 10*time.Second) + }, 5*time.Minute, 100*time.Millisecond) require.Eventually(t, func() bool { return p.LogContains("Reloading Beats inputs because forceReload is true") - }, 5*time.Minute, 10*time.Second) + }, 5*time.Minute, 100*time.Millisecond) require.Eventually(t, func() bool { return p.LogContains("ForceReload set to FALSE") - }, 5*time.Minute, 10*time.Second) + }, 5*time.Minute, 100*time.Millisecond) // Set it to true, so the temporaty directory is removed testSucceeded = true From a67753097644af928d77a1fb57d681829cba76d2 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Thu, 11 May 2023 11:38:14 +0200 Subject: [PATCH 31/46] Final PR improvements. --- CHANGELOG.next.asciidoc | 2 +- .../tests/integration/framework_test.go | 10 +-- .../tests/integration/input_reload_test.go | 67 +++++++++++++------ 3 files changed, 52 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index e0014f1e5856..253ca6d2650e 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -119,7 +119,7 @@ https://github.com/elastic/beats/compare/v8.7.1\...main[Check the HEAD diff] - Sanitize filenames for request tracer in cel input. {pull}35154[35154] - Fix accidental error overwrite in defer statement in entityanalytics Azure AD input. {issue}35153[35153] {pull}35169[35169] - Fixing the grok expression outputs of log files {pull}35221[35221] -- Fixes "Can only start an input when all related states are finished" when running under Elastic-Agent {pull}35250[35250] {issue}33653[33653] +- Fixes "Can only start an input when all related states are finished" error when running under Elastic-Agent {pull}35250[35250] {issue}33653[33653] - Move repeated Windows event channel not found errors in winlog input to debug level. {issue}35314[35314] {pull}35317[35317] - Fix crash when processing forwarded logs missing a message. {issue}34705[34705] {pull}34865[34865] diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 503126fb8a7c..7bd4c42e17da 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -46,20 +46,22 @@ func NewBeat(t *testing.T, binary string, args []string, tempDir string) BeatPro return p } +// Start starts the Beat process func (b *BeatProc) Start() { + t := b.t fullPath, err := filepath.Abs(b.Binary) if err != nil { - b.t.Fatalf("could got get full path from %q, err: %s", b.Binary, err) + t.Fatalf("could got get full path from %q, err: %s", b.Binary, err) } b.Cmd = exec.Command(fullPath, b.Args...) if err := b.Cmd.Start(); err != nil { - b.t.Fatalf("could not start process: %s", err) + t.Fatalf("could not start process: %s", err) } - b.t.Cleanup(func() { + t.Cleanup(func() { pid := b.Cmd.Process.Pid if err := b.Cmd.Process.Kill(); err != nil { - b.t.Fatalf("could not stop process with PID: %d, err: %s", pid, err) + t.Fatalf("could not stop process with PID: %d, err: %s", pid, err) } }) } diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 53dce710e76f..57809561a6e1 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -31,7 +31,8 @@ import ( // Run the tests wit -v flag to print the temporary folder used. func TestInputReloadUnderElasticAgent(t *testing.T) { // We create our own temp dir so the files can be persisted - // in case the test fails. This will help debugging issues on CI + // in case the test fails. This will help debugging issues + // locally and on CI. // // testSucceeded will be set to 'true' as the very last thing on this test, // it allows us to use t.CleanUp to remove the temporary files @@ -43,8 +44,9 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { } if err := os.MkdirAll(tempDir, 0766); err != nil { - t.Fatalf("cannot create tmp dir: %#v, msg: %s", err, err.Error()) + t.Fatalf("cannot create tmp dir: %s, msg: %s", err, err.Error()) } + t.Logf("Temporary directory: %s", tempDir) t.Cleanup(func() { if testSucceeded { if err := os.RemoveAll(tempDir); err != nil { @@ -54,8 +56,6 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { } }) - t.Logf("Temporary directory: %s", tempDir) - logFilePath := filepath.Join(tempDir, "flog.log") generateLogFile(t, logFilePath) var units = [][]*proto.UnitExpected{ @@ -151,6 +151,12 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { }, } + // Once the desired state is reached (aka Filebeat finished applying + // the policy changes) we still want for a little bit before sending + // another policy. This will allow the input to run and get some data + // into the publishing pipeline.j + // + // nextState is a helper function to handle this delay. idx := 0 waiting := false when := time.Now() @@ -167,6 +173,18 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { when = time.Now().Add(10 * time.Second) } server := &mock.StubServerV2{ + // The Beat will call the checkin function multiple times: + // - At least once at startup + // - At every state change (starting, configuring, healthy, etc) + // for every Unit. + // + // Because of that we can't rely on the number of times it is called + // we need some sort of state machine to handle when to send the next + // policy and when to just re-send the current one. + // + // If the Elastic-Agent wants the Beat to keep running the same policy, + // it will just keep re-sending it every time the Beat calls the checkin + // method. CheckinV2Impl: func(observed *proto.CheckinObserved) *proto.CheckinExpected { if management.DoesStateMatch(observed, units[idx], 0) { nextState() @@ -180,46 +198,49 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { Units: units[idx], } }, - ActionImpl: func(response *proto.ActionResponse) error { - return nil - }, - ActionsChan: make(chan *mock.PerformAction, 100), + ActionImpl: func(response *proto.ActionResponse) error { return nil }, + // ActionsChan: make(chan *mock.PerformAction, 100), } require.NoError(t, server.Start()) - defer server.Stop() + t.Cleanup(server.Stop) - p := NewBeat( + filebeat := NewBeat( t, "../../filebeat.test", []string{ - "-E", fmt.Sprintf("management.insecure_grpc_url_for_testing=\"localhost:%d\"", server.Port), + "-E", fmt.Sprintf(`management.insecure_grpc_url_for_testing="localhost:%d"`, server.Port), "-E", "management.enabled=true", }, tempDir, ) - p.Start() + filebeat.Start() require.Eventually(t, func() bool { - return p.LogContains("Can only start an input when all related states are finished") - }, 5*time.Minute, 100*time.Millisecond) + return filebeat.LogContains("Can only start an input when all related states are finished") + }, 5*time.Minute, 100*time.Millisecond, + "String 'Can only start an input when all related states are finished' not found on Filebeat logs") require.Eventually(t, func() bool { - return p.LogContains("file 'flog.log' is not finished, will retry starting the input soon") - }, 5*time.Minute, 100*time.Millisecond) + return filebeat.LogContains("file 'flog.log' is not finished, will retry starting the input soon") + }, 5*time.Minute, 100*time.Millisecond, + "String 'file 'flog.log' is not finished, will retry starting the input soon' not found on Filebeat logs") require.Eventually(t, func() bool { - return p.LogContains("ForceReload set to TRUE") - }, 5*time.Minute, 100*time.Millisecond) + return filebeat.LogContains("ForceReload set to TRUE") + }, 5*time.Minute, 100*time.Millisecond, + "String 'ForceReload set to TRUE' not found on Filebeat logs") require.Eventually(t, func() bool { - return p.LogContains("Reloading Beats inputs because forceReload is true") - }, 5*time.Minute, 100*time.Millisecond) + return filebeat.LogContains("Reloading Beats inputs because forceReload is true") + }, 5*time.Minute, 100*time.Millisecond, + "String 'Reloading Beats inputs because forceReload is true' not found on Filebeat logs") require.Eventually(t, func() bool { - return p.LogContains("ForceReload set to FALSE") - }, 5*time.Minute, 100*time.Millisecond) + return filebeat.LogContains("ForceReload set to FALSE") + }, 5*time.Minute, 100*time.Millisecond, + "String 'ForceReload set to FALSE' not found on Filebeat logs") // Set it to true, so the temporaty directory is removed testSucceeded = true @@ -233,6 +254,8 @@ func requireNewStruct(t *testing.T, v map[string]interface{}) *structpb.Struct { return str } +// generateLogFile generates a log file by appending the current +// time to it every second. func generateLogFile(t *testing.T, fullPath string) { t.Helper() f, err := os.Create(fullPath) From 1ffe0b6ab2eeca92dbc15c819d2cc8f4b87eaa3d Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 12 May 2023 16:26:40 +0200 Subject: [PATCH 32/46] Apply suggestions from code review Co-authored-by: Anderson Queiroz --- x-pack/filebeat/tests/integration/input_reload_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 57809561a6e1..a999b92fe9e5 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -152,9 +152,9 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { } // Once the desired state is reached (aka Filebeat finished applying - // the policy changes) we still want for a little bit before sending + // the policy changes) we still wait for a little bit before sending // another policy. This will allow the input to run and get some data - // into the publishing pipeline.j + // into the publishing pipeline. // // nextState is a helper function to handle this delay. idx := 0 From dc147b1cc8a7ef39e93d6dc67735492706a82802 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 12 May 2023 16:28:45 +0200 Subject: [PATCH 33/46] Update x-pack/filebeat/tests/integration/framework_test.go Co-authored-by: Anderson Queiroz --- x-pack/filebeat/tests/integration/framework_test.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 7bd4c42e17da..3b2bff2c858a 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -113,11 +113,7 @@ func (b *BeatProc) openLogFile() *os.File { if err != nil { t.Fatalf("could not expand log file glob: %s", err) } - if len(files) == 1 { - return true - } - - return false + return len(files) == 1 }, 5*time.Second, 100*time.Millisecond, "waiting for log file matching glob '%s' to be created", glob) From 9ea251ad5db96219b1e83fa808e6d5d9996522b5 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 12 May 2023 16:28:54 +0200 Subject: [PATCH 34/46] Update x-pack/filebeat/tests/integration/framework_test.go Co-authored-by: Anderson Queiroz --- x-pack/filebeat/tests/integration/framework_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 3b2bff2c858a..23e23c780a1c 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -30,7 +30,7 @@ type BeatProc struct { // NewBeat createa a new Beat process from the system tests binary. // It sets some requried options like the home path, logging, etc. -func NewBeat(t *testing.T, binary string, args []string, tempDir string) BeatProc { +func NewBeat(t *testing.T, binary string, tempDir string, args ...string) BeatProc { p := BeatProc{ t: t, Binary: binary, From 4c2fcd7933429f3ac1c16d7953b2e76d1837e914 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 12 May 2023 16:27:16 +0200 Subject: [PATCH 35/46] PR improvements --- .../tests/integration/input_reload_test.go | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index a999b92fe9e5..024a2a46aed9 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -156,7 +156,9 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { // another policy. This will allow the input to run and get some data // into the publishing pipeline. // - // nextState is a helper function to handle this delay. + // nextState is a helper function that will keep cycling through both + // elements of the `units` slice. Once one is fully applied, we wait + // at least 10s then send the next one. idx := 0 waiting := false when := time.Now() @@ -217,29 +219,49 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { filebeat.Start() + // waitDeadlineOr5Mins looks at the test deadline + // and returns a reasonable value of waiting for a + // condition to be met. The possible values are: + // - if no test deadline is set, return 5 minuets + // - if a deadline is set and there is less than + // 0.5 second left, return the time left + // - otherwise return the time left minus 0.5 second. + waitDeadlineOr5Min := func() time.Duration { + deadline, deadileSet := t.Deadline() + if deadileSet { + left := deadline.Sub(time.Now()) + final := left - 500*time.Millisecond + if final <= 0 { + return left + } + return final + } + return 5 * time.Minute + } + require.Eventually(t, func() bool { return filebeat.LogContains("Can only start an input when all related states are finished") - }, 5*time.Minute, 100*time.Millisecond, + }, waitDeadlineOr5Min(), 100*time.Millisecond, "String 'Can only start an input when all related states are finished' not found on Filebeat logs") require.Eventually(t, func() bool { return filebeat.LogContains("file 'flog.log' is not finished, will retry starting the input soon") - }, 5*time.Minute, 100*time.Millisecond, + }, waitDeadlineOr5Min(), 100*time.Millisecond, "String 'file 'flog.log' is not finished, will retry starting the input soon' not found on Filebeat logs") require.Eventually(t, func() bool { return filebeat.LogContains("ForceReload set to TRUE") - }, 5*time.Minute, 100*time.Millisecond, + }, waitDeadlineOr5Min(), 100*time.Millisecond, "String 'ForceReload set to TRUE' not found on Filebeat logs") require.Eventually(t, func() bool { return filebeat.LogContains("Reloading Beats inputs because forceReload is true") - }, 5*time.Minute, 100*time.Millisecond, + }, waitDeadlineOr5Min(), 100*time.Millisecond, "String 'Reloading Beats inputs because forceReload is true' not found on Filebeat logs") require.Eventually(t, func() bool { return filebeat.LogContains("ForceReload set to FALSE") - }, 5*time.Minute, 100*time.Millisecond, + }, waitDeadlineOr5Min(), 100*time.Millisecond, "String 'ForceReload set to FALSE' not found on Filebeat logs") // Set it to true, so the temporaty directory is removed From f1bf065ffbf18b28e756101034af4e8ab6d7b66a Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Fri, 12 May 2023 16:51:29 +0200 Subject: [PATCH 36/46] PR improvements --- .../tests/integration/framework_test.go | 2 ++ .../tests/integration/input_reload_test.go | 36 ++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index 23e23c780a1c..dc5adb239982 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -30,6 +30,8 @@ type BeatProc struct { // NewBeat createa a new Beat process from the system tests binary. // It sets some requried options like the home path, logging, etc. +// `tempDir` will be used a home a logs directory for the Beat +// `args` will be passed as CLI arguments to the Beat func NewBeat(t *testing.T, binary string, tempDir string, args ...string) BeatProc { p := BeatProc{ t: t, diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 024a2a46aed9..1e2f39ee53b2 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -7,7 +7,9 @@ package integration import ( + "context" "fmt" + "net/http" "os" "path/filepath" "testing" @@ -30,6 +32,11 @@ import ( // // Run the tests wit -v flag to print the temporary folder used. func TestInputReloadUnderElasticAgent(t *testing.T) { + // First things first, ensure ES is running and we can connect to it. + // If ES is not running, the test will timeout and the only way to know + // what caused is to go through Filebeat's logs + ensureESIsRunning(t) + // We create our own temp dir so the files can be persisted // in case the test fails. This will help debugging issues // locally and on CI. @@ -210,11 +217,9 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { filebeat := NewBeat( t, "../../filebeat.test", - []string{ - "-E", fmt.Sprintf(`management.insecure_grpc_url_for_testing="localhost:%d"`, server.Port), - "-E", "management.enabled=true", - }, tempDir, + "-E", fmt.Sprintf(`management.insecure_grpc_url_for_testing="localhost:%d"`, server.Port), + "-E", "management.enabled=true", ) filebeat.Start() @@ -306,3 +311,26 @@ func generateLogFile(t *testing.T, fullPath string) { } }() } + +func ensureESIsRunning(t *testing.T) { + t.Helper() + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(500*time.Second)) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost:9200", nil) + if err != nil { + t.Fatalf("cannot create request to ensure ES is running: %s", err) + } + + req.SetBasicAuth("admin", "testing") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + // If you're reading this message, you probably forgot to start ES + // run `mage compose:Up` from Filebeat's folder to start all + // containers required for integration tests + t.Fatalf("cannot execute HTTP request to ES: %s", err) + } + if resp.StatusCode != http.StatusOK { + t.Errorf("unexpected HTTP status: %d, expecting 200 - OK", resp.StatusCode) + } +} From 0f67fb2162da091def53dee6e13e560967179268 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 17 May 2023 10:43:14 +0200 Subject: [PATCH 37/46] Apply suggestions from code review Co-authored-by: Denis Co-authored-by: Anderson Queiroz --- x-pack/filebeat/tests/integration/framework_test.go | 8 ++++---- x-pack/filebeat/tests/integration/input_reload_test.go | 9 ++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index dc5adb239982..f81da3397a44 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -29,8 +29,8 @@ type BeatProc struct { } // NewBeat createa a new Beat process from the system tests binary. -// It sets some requried options like the home path, logging, etc. -// `tempDir` will be used a home a logs directory for the Beat +// It sets some required options like the home path, logging, etc. +// `tempDir` will be used as a home and a logs directory for the Beat // `args` will be passed as CLI arguments to the Beat func NewBeat(t *testing.T, binary string, tempDir string, args ...string) BeatProc { p := BeatProc{ @@ -53,7 +53,7 @@ func (b *BeatProc) Start() { t := b.t fullPath, err := filepath.Abs(b.Binary) if err != nil { - t.Fatalf("could got get full path from %q, err: %s", b.Binary, err) + t.Fatalf("could not get full path from %q, err: %s", b.Binary, err) } b.Cmd = exec.Command(fullPath, b.Args...) @@ -68,7 +68,7 @@ func (b *BeatProc) Start() { }) } -// LogContains looks for s as a sub string of every log line, +// LogContains looks for `s` as a substring of every log line, // it will open the log file on every call, read it until EOF, // then close it. func (b *BeatProc) LogContains(s string) bool { diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 1e2f39ee53b2..2d62bb81716c 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -59,7 +59,7 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { if err := os.RemoveAll(tempDir); err != nil { t.Fatalf("could not remove temp dir '%s': %s", tempDir, err) } - t.Logf("Temprary directory '%s' removed", tempDir) + t.Logf("Temporary directory '%s' removed", tempDir) } }) @@ -192,7 +192,7 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { // policy and when to just re-send the current one. // // If the Elastic-Agent wants the Beat to keep running the same policy, - // it will just keep re-sending it every time the Beat calls the checkin + // it will just keep re-sending it every time the Beat calls the check-in // method. CheckinV2Impl: func(observed *proto.CheckinObserved) *proto.CheckinExpected { if management.DoesStateMatch(observed, units[idx], 0) { @@ -208,7 +208,6 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { } }, ActionImpl: func(response *proto.ActionResponse) error { return nil }, - // ActionsChan: make(chan *mock.PerformAction, 100), } require.NoError(t, server.Start()) @@ -227,7 +226,7 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { // waitDeadlineOr5Mins looks at the test deadline // and returns a reasonable value of waiting for a // condition to be met. The possible values are: - // - if no test deadline is set, return 5 minuets + // - if no test deadline is set, return 5 minutes // - if a deadline is set and there is less than // 0.5 second left, return the time left // - otherwise return the time left minus 0.5 second. @@ -269,7 +268,7 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { }, waitDeadlineOr5Min(), 100*time.Millisecond, "String 'ForceReload set to FALSE' not found on Filebeat logs") - // Set it to true, so the temporaty directory is removed + // Set it to true, so the temporary directory is removed testSucceeded = true } From c6a74e9b512cd8feaebed7499b969dd8836f3f79 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 17 May 2023 10:50:02 +0200 Subject: [PATCH 38/46] fix typos --- x-pack/filebeat/tests/integration/framework_test.go | 2 +- x-pack/filebeat/tests/integration/input_reload_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/tests/integration/framework_test.go b/x-pack/filebeat/tests/integration/framework_test.go index f81da3397a44..1a65eb96da8e 100644 --- a/x-pack/filebeat/tests/integration/framework_test.go +++ b/x-pack/filebeat/tests/integration/framework_test.go @@ -30,7 +30,7 @@ type BeatProc struct { // NewBeat createa a new Beat process from the system tests binary. // It sets some required options like the home path, logging, etc. -// `tempDir` will be used as a home and a logs directory for the Beat +// `tempDir` will be used as home and logs directory for the Beat // `args` will be passed as CLI arguments to the Beat func NewBeat(t *testing.T, binary string, tempDir string, args ...string) BeatProc { p := BeatProc{ diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 2d62bb81716c..ab930d6c2a17 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -34,7 +34,7 @@ import ( func TestInputReloadUnderElasticAgent(t *testing.T) { // First things first, ensure ES is running and we can connect to it. // If ES is not running, the test will timeout and the only way to know - // what caused is to go through Filebeat's logs + // what caused it is going through Filebeat's logs. ensureESIsRunning(t) // We create our own temp dir so the files can be persisted From efd1811910ab48161f82e684a7091e3aa913c0c7 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 17 May 2023 10:55:33 +0200 Subject: [PATCH 39/46] improve comments --- .../filebeat/tests/integration/input_reload_test.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index ab930d6c2a17..bbed11c90ec4 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -24,8 +24,14 @@ import ( ) // TestInputReloadUnderElasticAgent will start a Filebeat and cause the input -// reload issue described on https://github.com/elastic/beats/issues/33653 -// to happen, then it will check the logs to ensure the fix is working. +// reload issue described on https://github.com/elastic/beats/issues/33653. +// In short, a new input for a file needs to be started while there are still +// events from that file in the publishing pipeline, effectively keeping +// the harvester status as `finished: false`, which prevents the new input +// from starting. +// +// This tests ensures Filebeat can gracefully recover from this situation +// and will eventually re-start harvesting the file. // // In case of a test failure the directory with Filebeat logs and // all other supporting files will be kept on build/integration-tests. @@ -182,7 +188,7 @@ func TestInputReloadUnderElasticAgent(t *testing.T) { when = time.Now().Add(10 * time.Second) } server := &mock.StubServerV2{ - // The Beat will call the checkin function multiple times: + // The Beat will call the check-in function multiple times: // - At least once at startup // - At every state change (starting, configuring, healthy, etc) // for every Unit. From 49f314d79f2637a4a34e7f06a9ee82f5d858af11 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 17 May 2023 12:49:49 +0200 Subject: [PATCH 40/46] stop logging gorourine when test finishes --- .../tests/integration/input_reload_test.go | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index bbed11c90ec4..eebfa533b7c8 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -298,20 +298,33 @@ func generateLogFile(t *testing.T, fullPath string) { go func() { t.Helper() ticker := time.NewTicker(time.Second) - defer ticker.Stop() - defer f.Close() + t.Cleanup(ticker.Stop) + + done := make(chan struct{}) + t.Cleanup(func() { close(done) }) + + defer func() { + if err := f.Close(); err != nil { + t.Errorf("could not close log file '%s': %s", fullPath, err) + } + }() + for { - now := <-ticker.C - _, err := fmt.Fprintln(f, now.Format(time.RFC3339)) - if err != nil { - // The Go compiler does not allow me to call t.Fatalf from a non-test - // goroutine, so just log it instead - t.Errorf("could not write data to log file '%s': %s", fullPath, err) + select { + case <-done: return - } - // make sure log lines are synced as quickly as possible - if err := f.Sync(); err != nil { - t.Errorf("could not sync file '%s': %s", fullPath, err) + case now := <-ticker.C: + _, err := fmt.Fprintln(f, now.Format(time.RFC3339)) + if err != nil { + // The Go compiler does not allow me to call t.Fatalf from a non-test + // goroutine, so just log it instead + t.Errorf("could not write data to log file '%s': %s", fullPath, err) + return + } + // make sure log lines are synced as quickly as possible + if err := f.Sync(); err != nil { + t.Errorf("could not sync file '%s': %s", fullPath, err) + } } } }() From e9ce324ce15b65a343a40c4b9b5a1b65c473612f Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 17 May 2023 12:56:10 +0200 Subject: [PATCH 41/46] Apply suggestions from code review Co-authored-by: Denis --- x-pack/libbeat/management/input_reload_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go index 6e05dc3bd4cd..474461e9868c 100644 --- a/x-pack/libbeat/management/input_reload_test.go +++ b/x-pack/libbeat/management/input_reload_test.go @@ -159,7 +159,7 @@ func TestInputReload(t *testing.T) { forceReloadStates := []bool{false, true, false} forceReloadStateIdx := 0 - forceReloadLastState := true // starts on true so the first iteratiction is already a change + forceReloadLastState := true // starts on true so the first iteration is already a change eventuallyCheck := func() bool { forceReload := mm.forceReload @@ -168,7 +168,7 @@ func TestInputReload(t *testing.T) { if forceReload != forceReloadLastState { forceReloadLastState = forceReload if forceReload == forceReloadStates[forceReloadStateIdx] { - // Setp to the next state + // Set to the next state forceReloadStateIdx++ } From d36f48672653082fd5a8e27795726fb87c733be2 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 17 May 2023 12:56:48 +0200 Subject: [PATCH 42/46] Update x-pack/libbeat/management/managerV2.go Co-authored-by: Denis --- x-pack/libbeat/management/managerV2.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index c04c144e3839..0cd4a640261f 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -741,7 +741,7 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { } else { // If there was no error reloading input and forceReload was // true, then set it to false. This prevents unnecessary logging - // and make it clear this was the monent when the input reload + // and makes it clear this was the moment when the input reload // finally worked. if cm.forceReload { cm.forceReload = false From a98f0a9cef9c412437ffd29d6069be49e23394bc Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 30 May 2023 18:04:55 +0200 Subject: [PATCH 43/46] use env vars with defaults for credentials --- .../filebeat/tests/integration/input_reload_test.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index eebfa533b7c8..96b2fc123df2 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -339,7 +339,17 @@ func ensureESIsRunning(t *testing.T) { t.Fatalf("cannot create request to ensure ES is running: %s", err) } - req.SetBasicAuth("admin", "testing") + user := os.Getenv("ES_USER") + if user == "" { + user = "admin" + } + + pass := os.Getenv("ES_PASS") + if pass == "" { + pass = "testing" + } + + req.SetBasicAuth(user, pass) resp, err := http.DefaultClient.Do(req) if err != nil { From 8eece56eceeb8fde9616fef2e653333fdac91851 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 30 May 2023 18:49:49 +0200 Subject: [PATCH 44/46] replace uses of `Cause` by `errors.Unwrap` --- .../libbeat/management/input_reload_test.go | 16 ++-------- x-pack/libbeat/management/managerV2.go | 31 +------------------ 2 files changed, 3 insertions(+), 44 deletions(-) diff --git a/x-pack/libbeat/management/input_reload_test.go b/x-pack/libbeat/management/input_reload_test.go index 474461e9868c..972c42908f11 100644 --- a/x-pack/libbeat/management/input_reload_test.go +++ b/x-pack/libbeat/management/input_reload_test.go @@ -34,10 +34,10 @@ func TestInputReload(t *testing.T) { ReloadImpl: func(configs []*reload.ConfigWithMeta) error { reloadCallCount++ if reloadCallCount == 1 { - e1 := multierror.Errors{withCause{&common.ErrInputNotFinished{ + e1 := multierror.Errors{fmt.Errorf("%w", &common.ErrInputNotFinished{ State: "", File: "/tmp/foo.log", - }}} + })} return e1.Err() } @@ -205,15 +205,3 @@ func (r *reloadableListMock) Configs() []*reload.ConfigWithMeta { defer r.mx.Unlock() return r.configs } - -type withCause struct { - Err error -} - -func (w withCause) Cause() error { - return w.Err -} - -func (w withCause) Error() string { - return w.Err.Error() -} diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 0cd4a640261f..6b803a1ddcb9 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -716,7 +716,7 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { // the standard library errors package. if errors.As(err, &merror) { for _, err := range merror.Errors { - causeErr := cause(err) + causeErr := errors.Unwrap(err) // A Log input is only marked as finished when all events it // produceds are acked by the acker so when we see this error, // we just retry until the new input can be started. @@ -871,32 +871,3 @@ func didChange(previous map[string]*proto.UnitExpectedConfig, latest map[string] } return false } - -// cause returns the underlying cause of the error, if possible. -// An error value has a cause if it implements the following -// interface: -// -// type causer interface { -// Cause() error -// } -// -// If the error does not implement Cause, the original error will -// be returned. If the error is nil, nil will be returned without further -// investigation. -// Source: https://github.com/pkg/errors/blob/614d223910a179a466c1767a985424175c39b465/errors.go#L264-L288 -func cause(err error) error { - type causer interface { - Cause() error - } - - for err != nil { - //nolint: errorlint // This is the original github.com/pkg/errors - // implementation. - cause, ok := err.(causer) - if !ok { - break - } - err = cause.Cause() - } - return err -} From 844194a0dc5adec7f5407caca2bb0fadd7fab1d9 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 31 May 2023 12:12:39 +0200 Subject: [PATCH 45/46] Update x-pack/filebeat/tests/integration/input_reload_test.go Co-authored-by: Denis --- x-pack/filebeat/tests/integration/input_reload_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/filebeat/tests/integration/input_reload_test.go b/x-pack/filebeat/tests/integration/input_reload_test.go index 96b2fc123df2..cab3b273a9bf 100644 --- a/x-pack/filebeat/tests/integration/input_reload_test.go +++ b/x-pack/filebeat/tests/integration/input_reload_test.go @@ -36,7 +36,7 @@ import ( // In case of a test failure the directory with Filebeat logs and // all other supporting files will be kept on build/integration-tests. // -// Run the tests wit -v flag to print the temporary folder used. +// Run the tests with -v flag to print the temporary folder used. func TestInputReloadUnderElasticAgent(t *testing.T) { // First things first, ensure ES is running and we can connect to it. // If ES is not running, the test will timeout and the only way to know From a37a27599562506bbc566183a4eb13565b6b58f4 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 31 May 2023 12:13:00 +0200 Subject: [PATCH 46/46] Update x-pack/libbeat/management/managerV2.go Co-authored-by: Denis --- x-pack/libbeat/management/managerV2.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/libbeat/management/managerV2.go b/x-pack/libbeat/management/managerV2.go index 6b803a1ddcb9..70db3fd88b08 100644 --- a/x-pack/libbeat/management/managerV2.go +++ b/x-pack/libbeat/management/managerV2.go @@ -718,7 +718,7 @@ func (cm *BeatV2Manager) reloadInputs(inputUnits []*client.Unit) error { for _, err := range merror.Errors { causeErr := errors.Unwrap(err) // A Log input is only marked as finished when all events it - // produceds are acked by the acker so when we see this error, + // produced are acked by the acker so when we see this error, // we just retry until the new input can be started. // This is the same logic used by the standalone configuration file // reloader implemented on libbeat/cfgfile/reload.go