From fc4a2a36f231d254c8b500655ef10d8e233ba309 Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Fri, 5 Jun 2020 17:45:00 +0200 Subject: [PATCH] Windows: fix service termination (#18916) Update the Windows service handling logic so that the service doesn't transition to the STOPPED state until the beater is terminated. Before this patch, a Beats service would report to be STOPPED as soon as it received the stop request. This causes some problems during service restarts, as the new service would start while the old one was still cleaning up. Fixes #18914 (cherry picked from commit f3ab7c78a92ac0e11feb537c92a5587ac465d1c1) --- CHANGELOG.next.asciidoc | 1 + libbeat/cmd/instance/beat.go | 6 ++++++ libbeat/service/service.go | 5 +++++ libbeat/service/service_unix.go | 3 +++ libbeat/service/service_windows.go | 27 ++++++++++++++++++++++++--- 5 files changed, 39 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index ed38345fadb..4e03e9c7394 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -117,6 +117,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - [Autodiscover] Check if runner is already running before starting again. {pull}18564[18564] - Fix `keystore add` hanging under Windows. {issue}18649[18649] {pull}18654[18654] - Fix regression in `add_kubernetes_metadata`, so configured `indexers` and `matchers` are used if defaults are not disabled. {issue}18481[18481] {pull}18818[18818] +- Fixed a service restart failure under Windows. {issue}18914[18914] {pull}18916[18916] *Auditbeat* diff --git a/libbeat/cmd/instance/beat.go b/libbeat/cmd/instance/beat.go index ca346085866..c9c53d6936b 100644 --- a/libbeat/cmd/instance/beat.go +++ b/libbeat/cmd/instance/beat.go @@ -390,6 +390,12 @@ func (b *Beat) launch(settings Settings, bt beat.Creator) error { return err } + // Windows: Mark service as stopped. + // After this is run, a Beat service is considered by the OS to be stopped + // and another instance of the process can be started. + // This must be the first deferred cleanup task (last to execute). + defer svc.NotifyTermination() + // Try to acquire exclusive lock on data path to prevent another beat instance // sharing same data path. bl := newLocker(b) diff --git a/libbeat/service/service.go b/libbeat/service/service.go index ec6e0fca672..4c56cfc28a2 100644 --- a/libbeat/service/service.go +++ b/libbeat/service/service.go @@ -67,6 +67,11 @@ func HandleSignals(stopFunction func(), cancel context.CancelFunc) { }) } +// NotifyTermination tells the OS that the service is stopped. +func NotifyTermination() { + notifyWindowsServiceStopped() +} + // cmdline flags var memprofile, cpuprofile, httpprof *string var cpuOut *os.File diff --git a/libbeat/service/service_unix.go b/libbeat/service/service_unix.go index 7c6bfb4d08a..7d20b04620e 100644 --- a/libbeat/service/service_unix.go +++ b/libbeat/service/service_unix.go @@ -22,3 +22,6 @@ package service // ProcessWindowsControlEvents is not used on non-windows platforms. func ProcessWindowsControlEvents(stopCallback func()) { } + +func notifyWindowsServiceStopped() { +} diff --git a/libbeat/service/service_windows.go b/libbeat/service/service_windows.go index 649bf85cfa8..a81f4fb5a0f 100644 --- a/libbeat/service/service_windows.go +++ b/libbeat/service/service_windows.go @@ -28,7 +28,15 @@ import ( "github.com/elastic/beats/v7/libbeat/logp" ) -type beatService struct{} +type beatService struct { + stopCallback func() + done chan struct{} +} + +var serviceInstance = &beatService{ + stopCallback: nil, + done: make(chan struct{}, 0), +} // Execute runs the beat service with the arguments and manages changes that // occur in the environment or runtime that may affect the beat. @@ -52,9 +60,22 @@ loop: } } changes <- svc.Status{State: svc.StopPending} + m.stopCallback() + // Block until notifyWindowsServiceStopped below is called. This is required + // as the windows/svc package will transition the service to STOPPED state + // once this function returns. + <-m.done return } +func (m *beatService) stop() { + close(m.done) +} + +func notifyWindowsServiceStopped() { + serviceInstance.stop() +} + // couldNotConnect is the errno for ERROR_FAILED_SERVICE_CONTROLLER_CONNECT. const couldNotConnect syscall.Errno = 1063 @@ -76,10 +97,10 @@ func ProcessWindowsControlEvents(stopCallback func()) { run = debug.Run } - err = run(os.Args[0], &beatService{}) + serviceInstance.stopCallback = stopCallback + err = run(os.Args[0], serviceInstance) if err == nil { - stopCallback() return }