From 371871e5d7f867a7b9b432af5e901e882a4d8082 Mon Sep 17 00:00:00 2001 From: Blake Rouse Date: Wed, 28 Apr 2021 12:31:17 -0400 Subject: [PATCH 1/4] Delay the restart of application when a status report of failure is given (#25339) * Delay the restart of application when a status report of failure is given. * Add changelog. * Fix test and make it configurable. * Run mage check --- x-pack/elastic-agent/CHANGELOG.next.asciidoc | 2 + .../elastic-agent/pkg/agent/cmd/enroll_cmd.go | 7 -- .../pkg/agent/operation/common_test.go | 4 +- .../pkg/core/plugin/process/app.go | 4 +- .../pkg/core/plugin/process/status.go | 77 ++++++++++++++++--- .../elastic-agent/pkg/core/process/config.go | 10 ++- 6 files changed, 80 insertions(+), 24 deletions(-) diff --git a/x-pack/elastic-agent/CHANGELOG.next.asciidoc b/x-pack/elastic-agent/CHANGELOG.next.asciidoc index 403f0a106c6..2ff301db695 100644 --- a/x-pack/elastic-agent/CHANGELOG.next.asciidoc +++ b/x-pack/elastic-agent/CHANGELOG.next.asciidoc @@ -55,8 +55,10 @@ - Fixed: limit for retries to Kibana configurable {issue}25063[25063] - Fix issue with status and inspect inside of container {pull}25204[25204] - Remove FLEET_SERVER_POLICY_NAME env variable as it was not used {pull}25149[25149] +- Reduce log level for listener cleanup to debug {pull}25274 - Passing in policy id to container command works {pull}25352[25352] - Reduce log level for listener cleanup to debug {pull}25274[25274] +- Delay the restart of application when a status report of failure is given {pull}25339[25339] ==== New features diff --git a/x-pack/elastic-agent/pkg/agent/cmd/enroll_cmd.go b/x-pack/elastic-agent/pkg/agent/cmd/enroll_cmd.go index dc5e4753445..f872f71dafe 100644 --- a/x-pack/elastic-agent/pkg/agent/cmd/enroll_cmd.go +++ b/x-pack/elastic-agent/pkg/agent/cmd/enroll_cmd.go @@ -613,13 +613,6 @@ func waitForFleetServer(ctx context.Context, agentSubproc <-chan *os.ProcessStat } resChan <- waitResult{enrollmentToken: token} break - } else if app.Status == proto.Status_FAILED { - // app completely failed; exit now - if app.Message != "" { - log.Infof("Fleet Server - %s", app.Message) - } - resChan <- waitResult{err: errors.New(app.Message)} - break } if app.Message != "" { appMsg := fmt.Sprintf("Fleet Server - %s", app.Message) diff --git a/x-pack/elastic-agent/pkg/agent/operation/common_test.go b/x-pack/elastic-agent/pkg/agent/operation/common_test.go index 505eadc8d08..7ebcb085555 100644 --- a/x-pack/elastic-agent/pkg/agent/operation/common_test.go +++ b/x-pack/elastic-agent/pkg/agent/operation/common_test.go @@ -41,7 +41,9 @@ func getTestOperator(t *testing.T, downloadPath string, installPath string, p *a Delay: 3 * time.Second, MaxDelay: 10 * time.Second, }, - ProcessConfig: &process.Config{}, + ProcessConfig: &process.Config{ + FailureTimeout: 1, // restart instantly + }, DownloadConfig: &artifact.Config{ TargetDirectory: downloadPath, InstallPath: installPath, diff --git a/x-pack/elastic-agent/pkg/core/plugin/process/app.go b/x-pack/elastic-agent/pkg/core/plugin/process/app.go index 9f52f74ce38..c0c6341cbd3 100644 --- a/x-pack/elastic-agent/pkg/core/plugin/process/app.go +++ b/x-pack/elastic-agent/pkg/core/plugin/process/app.go @@ -58,7 +58,9 @@ type Application struct { logger *logger.Logger - appLock sync.Mutex + appLock sync.Mutex + restartCanceller context.CancelFunc + restartConfig map[string]interface{} } // ArgsDecorator decorates arguments before calling an application diff --git a/x-pack/elastic-agent/pkg/core/plugin/process/status.go b/x-pack/elastic-agent/pkg/core/plugin/process/status.go index 21ded667101..6838c0a18d4 100644 --- a/x-pack/elastic-agent/pkg/core/plugin/process/status.go +++ b/x-pack/elastic-agent/pkg/core/plugin/process/status.go @@ -5,7 +5,9 @@ package process import ( + "context" "fmt" + "time" "gopkg.in/yaml.v2" @@ -35,21 +37,74 @@ func (a *Application) OnStatusChange(s *server.ApplicationState, status proto.St return } - // kill the process - if a.state.ProcessInfo != nil { - _ = a.state.ProcessInfo.Process.Kill() - a.state.ProcessInfo = nil - } - ctx := a.startContext - tag := a.tag - // it was marshalled to pass into the state, so unmarshall will always succeed var cfg map[string]interface{} _ = yaml.Unmarshal([]byte(s.Config()), &cfg) - err := a.start(ctx, tag, cfg) - if err != nil { - a.setState(state.Crashed, fmt.Sprintf("failed to restart: %s", err), nil) + // start the failed timer + a.startFailedTimer(cfg) + } else { + a.stopFailedTimer() + } +} + +// startFailedTimer starts a timer that will restart the application if it doesn't exit failed after a period of time. +// +// This does not grab the appLock, that must be managed by the caller. +func (a *Application) startFailedTimer(cfg map[string]interface{}) { + if a.restartCanceller != nil { + // already have running failed timer; just update config + a.restartConfig = cfg + return + } + + ctx, cancel := context.WithCancel(a.startContext) + a.restartCanceller = cancel + a.restartConfig = cfg + t := time.NewTimer(a.processConfig.FailureTimeout) + go func() { + defer func() { + a.appLock.Lock() + a.restartCanceller = nil + a.restartConfig = nil + a.appLock.Unlock() + }() + + select { + case <-ctx.Done(): + return + case <-t.C: + a.restart(a.restartConfig) } + }() +} + +// stopFailedTimer stops the timer that would restart the application from reporting failure. +// +// This does not grab the appLock, that must be managed by the caller. +func (a *Application) stopFailedTimer() { + if a.restartCanceller == nil { + return + } + a.restartCanceller() + a.restartCanceller = nil +} + +// restart restarts the application +func (a *Application) restart(cfg map[string]interface{}) { + a.appLock.Lock() + defer a.appLock.Unlock() + + // kill the process + if a.state.ProcessInfo != nil { + _ = a.state.ProcessInfo.Process.Kill() + a.state.ProcessInfo = nil + } + ctx := a.startContext + tag := a.tag + + err := a.start(ctx, tag, cfg) + if err != nil { + a.setState(state.Crashed, fmt.Sprintf("failed to restart: %s", err), nil) } } diff --git a/x-pack/elastic-agent/pkg/core/process/config.go b/x-pack/elastic-agent/pkg/core/process/config.go index 72e8e466720..4d12fc60f04 100644 --- a/x-pack/elastic-agent/pkg/core/process/config.go +++ b/x-pack/elastic-agent/pkg/core/process/config.go @@ -8,8 +8,9 @@ import "time" // Config for fine tuning new process type Config struct { - SpawnTimeout time.Duration `yaml:"spawn_timeout" config:"spawn_timeout"` - StopTimeout time.Duration `yaml:"stop_timeout" config:"stop_timeout"` + SpawnTimeout time.Duration `yaml:"spawn_timeout" config:"spawn_timeout"` + StopTimeout time.Duration `yaml:"stop_timeout" config:"stop_timeout"` + FailureTimeout time.Duration `yaml:"failure_timeout" config:"failure_timeout"` // TODO: cgroups and namespaces } @@ -17,7 +18,8 @@ type Config struct { // DefaultConfig creates a config with pre-set default values. func DefaultConfig() *Config { return &Config{ - SpawnTimeout: 30 * time.Second, - StopTimeout: 30 * time.Second, + SpawnTimeout: 30 * time.Second, + StopTimeout: 30 * time.Second, + FailureTimeout: 10 * time.Second, } } From 5a5fa15cf0355c61bc685e7baef4ffe106c93085 Mon Sep 17 00:00:00 2001 From: Blake Rouse Date: Wed, 28 Apr 2021 12:31:56 -0400 Subject: [PATCH 2/4] [Elastic Agent] Don't log when upgrade capability doesn't apply (#25386) * Fix #25302. * Add changelog entry. --- x-pack/elastic-agent/CHANGELOG.next.asciidoc | 1 + x-pack/elastic-agent/pkg/capabilities/upgrade.go | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/elastic-agent/CHANGELOG.next.asciidoc b/x-pack/elastic-agent/CHANGELOG.next.asciidoc index 2ff301db695..a84def8f3ec 100644 --- a/x-pack/elastic-agent/CHANGELOG.next.asciidoc +++ b/x-pack/elastic-agent/CHANGELOG.next.asciidoc @@ -59,6 +59,7 @@ - Passing in policy id to container command works {pull}25352[25352] - Reduce log level for listener cleanup to debug {pull}25274[25274] - Delay the restart of application when a status report of failure is given {pull}25339[25339] +- Don't log when upgrade capability doesn't apply {pull}25386[25386] ==== New features diff --git a/x-pack/elastic-agent/pkg/capabilities/upgrade.go b/x-pack/elastic-agent/pkg/capabilities/upgrade.go index a98e56c149f..1e189d7740f 100644 --- a/x-pack/elastic-agent/pkg/capabilities/upgrade.go +++ b/x-pack/elastic-agent/pkg/capabilities/upgrade.go @@ -147,7 +147,6 @@ type multiUpgradeCapability struct { func (c *multiUpgradeCapability) Apply(in interface{}) (interface{}, error) { upgradeMap := upgradeObject(in) if upgradeMap == nil { - c.log.Warnf("expecting map config object but got nil for capability 'multi-upgrade'") // not an upgrade we don't alter origin return in, nil } From 583b9772adc7c6af0cd632227434f309a5cab4d1 Mon Sep 17 00:00:00 2001 From: DeDe Morton Date: Wed, 28 Apr 2021 10:04:30 -0700 Subject: [PATCH 3/4] Fix includes to fix broken doc build (#25401) --- filebeat/docs/modules/zookeeper.asciidoc | 17 +++++------------ .../module/zookeeper/_meta/docs.asciidoc | 17 +++++------------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/filebeat/docs/modules/zookeeper.asciidoc b/filebeat/docs/modules/zookeeper.asciidoc index ea6a3df0a24..0449d7b3555 100644 --- a/filebeat/docs/modules/zookeeper.asciidoc +++ b/filebeat/docs/modules/zookeeper.asciidoc @@ -21,6 +21,11 @@ The +{modulename}+ module was tested with logs from versions 3.7.0. include::../include/configuring-intro.asciidoc[] +//set the fileset name used in the included example +:fileset_ex: audit + +include::../include/config-option-intro.asciidoc[] + The following example shows how to set paths in the +modules.d/{modulename}.yml+ file to override the default paths for logs: @@ -54,11 +59,6 @@ Audit logging is available since Zookeeper 3.6.0, but it is disabled by default. audit.enable=true ---------------------- -//set the fileset name used in the included example -:fileset_ex: audit - -include::../include/config-option-intro.asciidoc[] - [float] ==== `audit` fileset settings @@ -66,13 +66,6 @@ include::../include/var-paths.asciidoc[] include::../include/timezone-support.asciidoc[] -:fileset_ex!: - -//set the fileset name used in the included example -:fileset_ex: log - -include::../include/config-option-intro.asciidoc[] - [float] ==== `log` fileset settings diff --git a/x-pack/filebeat/module/zookeeper/_meta/docs.asciidoc b/x-pack/filebeat/module/zookeeper/_meta/docs.asciidoc index 2e680ce7b2d..e01ae4d32a9 100644 --- a/x-pack/filebeat/module/zookeeper/_meta/docs.asciidoc +++ b/x-pack/filebeat/module/zookeeper/_meta/docs.asciidoc @@ -16,6 +16,11 @@ The +{modulename}+ module was tested with logs from versions 3.7.0. include::../include/configuring-intro.asciidoc[] +//set the fileset name used in the included example +:fileset_ex: audit + +include::../include/config-option-intro.asciidoc[] + The following example shows how to set paths in the +modules.d/{modulename}.yml+ file to override the default paths for logs: @@ -49,11 +54,6 @@ Audit logging is available since Zookeeper 3.6.0, but it is disabled by default. audit.enable=true ---------------------- -//set the fileset name used in the included example -:fileset_ex: audit - -include::../include/config-option-intro.asciidoc[] - [float] ==== `audit` fileset settings @@ -61,13 +61,6 @@ include::../include/var-paths.asciidoc[] include::../include/timezone-support.asciidoc[] -:fileset_ex!: - -//set the fileset name used in the included example -:fileset_ex: log - -include::../include/config-option-intro.asciidoc[] - [float] ==== `log` fileset settings From 7916ad4d0f7b6f28d84c889546ae28d9b4ce6d84 Mon Sep 17 00:00:00 2001 From: Nicolas Ruflin Date: Wed, 28 Apr 2021 19:08:35 +0200 Subject: [PATCH 4/4] [elastic-agent] Add hint to version command for binary (#25380) By default when running `elastic-agent version` it is expected that the daemon is running. If it is not running, an error is shown. Then the flag --binary-only should be used. A hint for this is added here to the error message for users. --- x-pack/elastic-agent/pkg/basecmd/version/cmd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/elastic-agent/pkg/basecmd/version/cmd.go b/x-pack/elastic-agent/pkg/basecmd/version/cmd.go index 15c3caf27a8..80bc6c7592d 100644 --- a/x-pack/elastic-agent/pkg/basecmd/version/cmd.go +++ b/x-pack/elastic-agent/pkg/basecmd/version/cmd.go @@ -64,7 +64,7 @@ func NewCommandWithArgs(streams *cli.IOStreams) *cobra.Command { binaryOnly, _ := cmd.Flags().GetBool("binary-only") if !binaryOnly { if d, err := queryDaemon(); err != nil { - returnErr = fmt.Errorf("failed to communicate with running daemon: %w", err) + returnErr = fmt.Errorf("could not get version. failed to communicate with running daemon: %w\nUse --binary-only flag to skip trying to retrieve version from running daemon", err) } else { daemon = d if isMismatch(&binary, daemon) {