From bc44bcb4242434f6fe7050a989442378b120ea84 Mon Sep 17 00:00:00 2001 From: Zack Olson Date: Tue, 12 Nov 2024 11:49:23 -0500 Subject: [PATCH] upgrade watchdog service to scheduled task (#1951) --- cmd/launcher/launcher.go | 2 +- cmd/launcher/main.go | 2 +- docs/architecture/launcher_watchdog.md | 31 +- .../power_event_watcher_windows.go | 12 +- ee/uninstall/uninstall_windows.go | 4 +- ee/watchdog/controller_other.go | 2 +- ee/watchdog/controller_windows.go | 550 ++++++++++++------ ee/watchdog/controller_windows_test.go | 5 +- ee/watchdog/watchdog_service_windows.go | 290 --------- ...ervice_other.go => watchdog_task_other.go} | 2 +- ee/watchdog/watchdog_task_windows.go | 140 +++++ pkg/launcher/paths.go | 32 + pkg/launcher/pkg_utils_windows.go | 13 + pkg/launcher/pkg_utils_windows_test.go | 44 ++ 14 files changed, 633 insertions(+), 496 deletions(-) delete mode 100644 ee/watchdog/watchdog_service_windows.go rename ee/watchdog/{watchdog_service_other.go => watchdog_task_other.go} (71%) create mode 100644 ee/watchdog/watchdog_task_windows.go diff --git a/cmd/launcher/launcher.go b/cmd/launcher/launcher.go index ca2fc2154..e33d79a86 100644 --- a/cmd/launcher/launcher.go +++ b/cmd/launcher/launcher.go @@ -302,7 +302,7 @@ func runLauncher(ctx context.Context, cancel func(), multiSlogger, systemMultiSl go checkpointer.Once(ctx) runGroup.Add("logcheckpoint", checkpointer.Run, checkpointer.Interrupt) - watchdogController, err := watchdog.NewController(ctx, k) + watchdogController, err := watchdog.NewController(ctx, k, opts.ConfigFilePath) if err != nil { // log any issues here but move on, watchdog is not critical path slogger.Log(ctx, slog.LevelError, "could not init watchdog controller", diff --git a/cmd/launcher/main.go b/cmd/launcher/main.go index dd8342c7b..235ac6fd5 100644 --- a/cmd/launcher/main.go +++ b/cmd/launcher/main.go @@ -191,7 +191,7 @@ func runSubcommands(systemMultiSlogger *multislogger.MultiSlogger) error { case "secure-enclave": run = runSecureEnclave case "watchdog": // note: this is currently only implemented for windows - run = watchdog.RunWatchdogService + run = watchdog.RunWatchdogTask default: return fmt.Errorf("unknown subcommand %s", os.Args[1]) } diff --git a/docs/architecture/launcher_watchdog.md b/docs/architecture/launcher_watchdog.md index e480316f9..bd0a91cc9 100644 --- a/docs/architecture/launcher_watchdog.md +++ b/docs/architecture/launcher_watchdog.md @@ -3,31 +3,30 @@ Note that for the initial implementation, this service is windows only. It is intentionally designed to give room for alternate OS implementations if needed in the future. Most of the relevant code can be found in [ee/watchdog](../../ee/watchdog/) -Here is a basic sequence diagram displaying the enable path for the windows watchdog service. The `launcher_watchdog_enabled` control flag will trigger the initial configuration and installation, and removal of the flag will trigger removal of the service. +Here is a basic sequence diagram displaying the enable path for the windows watchdog task. The `launcher_watchdog_enabled` control flag will trigger the initial configuration and installation, and removal of the flag will trigger removal of the task. + +You can alternatively install or remove the task for testing/troubleshooting using the `--install-task` and `--remove-task` options for the watchdog subcommand. Note this is intended for developer convenience or emergency usage - the `launcher_watchdog_enabled` flag sent from cloud will eventually override any manual actions. ```mermaid sequenceDiagram participant LauncherKolideK2Svc Note right of LauncherKolideK2Svc: ./launcher.exe svc ... - create participant WindowsServiceManager - LauncherKolideK2Svc->>WindowsServiceManager: if launcher_watchdog_enabled - create participant LauncherKolideWatchdogSvc - WindowsServiceManager->>LauncherKolideWatchdogSvc: have we installed the watchdog? - Note left of LauncherKolideWatchdogSvc: ./launcher.exe watchdog + participant WindowsSchedulerService + participant LauncherKolideK2WatchdogTask + Note right of LauncherKolideK2WatchdogTask: ./launcher.exe watchdog - alt yes the service already exists - LauncherKolideK2Svc->>LauncherKolideWatchdogSvc: Restart to ensure latest - else no the service does not exist - LauncherKolideK2Svc->>WindowsServiceManager: 1 - create, configure, etc - LauncherKolideK2Svc->>LauncherKolideWatchdogSvc: 2 - Start - activate LauncherKolideWatchdogSvc + alt launcher_watchdog_enabled + LauncherKolideK2Svc->>WindowsSchedulerService: create, configure, and install watchdog task + activate LauncherKolideK2WatchdogTask + else flag is not enabled + LauncherKolideK2Svc->>WindowsSchedulerService: remove watchdog task end - loop every n minutes - LauncherKolideWatchdogSvc->>WindowsServiceManager: Query LauncherKolideK2Svc status - LauncherKolideWatchdogSvc->>LauncherKolideK2Svc: Start if Stopped + loop every 30 minutes, or 1 minute after wake event + WindowsSchedulerService->>LauncherKolideK2WatchdogTask: triggers scheduled task + LauncherKolideK2WatchdogTask->>LauncherKolideK2Svc: performs healthcheck, restarts if stopped end ``` The restart functionality is currently limited to detecting a stopped state, but the idea here is to lay out the foundation for more advanced healthchecking. -The watchdog service itself runs as a separate invocation of launcher, writing all logs to sqlite. The main invocation of launcher runs a watchdog controller, which responds to the `launcher_watchdog_enabled` flag, and publishes all sqlite logs to debug.json. \ No newline at end of file +The watchdog task itself runs as a launcher subcommand, performing any required checks/actions and writing all logs to sqlite before exiting. The main invocation of launcher runs a watchdog controller, which responds to the `launcher_watchdog_enabled` flag, and publishes all sqlite logs to debug.json. \ No newline at end of file diff --git a/ee/powereventwatcher/power_event_watcher_windows.go b/ee/powereventwatcher/power_event_watcher_windows.go index a13699b64..a977e9a65 100644 --- a/ee/powereventwatcher/power_event_watcher_windows.go +++ b/ee/powereventwatcher/power_event_watcher_windows.go @@ -66,9 +66,9 @@ type ( const ( eventIdEnteringModernStandby = 506 - eventIdExitingModernStandby = 507 + EventIdExitingModernStandby = 507 eventIdEnteringSleep = 42 - eventIdResumedFromSleep = 107 + EventIdResumedFromSleep = 107 operationSuccessfulMsg = "The operation completed successfully." ) @@ -108,7 +108,7 @@ func (ims *InMemorySleepStateUpdater) OnPowerEvent(eventID int) error { switch eventID { case eventIdEnteringModernStandby, eventIdEnteringSleep: ims.inModernStandby = true - case eventIdExitingModernStandby, eventIdResumedFromSleep: + case EventIdExitingModernStandby, EventIdResumedFromSleep: ims.inModernStandby = false default: ims.slogger.Log(context.TODO(), slog.LevelWarn, @@ -134,7 +134,7 @@ func (ks *knapsackSleepStateUpdater) OnPowerEvent(eventID int) error { "err", err, ) } - case eventIdExitingModernStandby, eventIdResumedFromSleep: + case EventIdExitingModernStandby, EventIdResumedFromSleep: ks.slogger.Log(context.TODO(), slog.LevelDebug, "system is waking", "event_id", eventID, @@ -187,9 +187,9 @@ func New(ctx context.Context, slogger *slog.Logger, pes powerEventSubscriber) (* queryStr := fmt.Sprintf("*[System[Provider[@Name='Microsoft-Windows-Kernel-Power'] and (EventID=%d or EventID=%d or EventID=%d or EventID=%d)]]", eventIdEnteringModernStandby, - eventIdExitingModernStandby, + EventIdExitingModernStandby, eventIdEnteringSleep, - eventIdResumedFromSleep, + EventIdResumedFromSleep, ) query, err := syscall.UTF16PtrFromString(queryStr) if err != nil { diff --git a/ee/uninstall/uninstall_windows.go b/ee/uninstall/uninstall_windows.go index d1bb33a79..2ba43c745 100644 --- a/ee/uninstall/uninstall_windows.go +++ b/ee/uninstall/uninstall_windows.go @@ -35,8 +35,8 @@ func disableAutoStart(ctx context.Context, k types.Knapsack) error { } // attempt to remove watchdog service in case it is installed to prevent startups later on - if err := watchdog.RemoveService(svcMgr); err != nil { - return fmt.Errorf("removing watchdog service, error may be expected if not enabled: %w", err) + if err := watchdog.RemoveWatchdogTask(k.Identifier()); err != nil { + return fmt.Errorf("removing watchdog task, error may be expected if not installed: %w", err) } return nil diff --git a/ee/watchdog/controller_other.go b/ee/watchdog/controller_other.go index 0b902e585..b221cc22d 100644 --- a/ee/watchdog/controller_other.go +++ b/ee/watchdog/controller_other.go @@ -12,7 +12,7 @@ import ( type WatchdogController struct{} -func NewController(_ context.Context, _ types.Knapsack) (*WatchdogController, error) { +func NewController(_ context.Context, _ types.Knapsack, _ string) (*WatchdogController, error) { return nil, nil } diff --git a/ee/watchdog/controller_windows.go b/ee/watchdog/controller_windows.go index 3cd8dca63..f318ef393 100644 --- a/ee/watchdog/controller_windows.go +++ b/ee/watchdog/controller_windows.go @@ -7,45 +7,42 @@ import ( "context" "encoding/json" "fmt" + "log" "log/slog" - "os" - "path/filepath" "slices" + "strings" "time" + "github.com/go-ole/go-ole" + "github.com/go-ole/go-ole/oleutil" "github.com/kolide/launcher/ee/agent/flags/keys" agentsqlite "github.com/kolide/launcher/ee/agent/storage/sqlite" "github.com/kolide/launcher/ee/agent/types" - "github.com/kolide/launcher/pkg/backoff" + "github.com/kolide/launcher/ee/powereventwatcher" "github.com/kolide/launcher/pkg/launcher" "golang.org/x/sys/windows" - "golang.org/x/sys/windows/svc" - "golang.org/x/sys/windows/svc/mgr" ) const ( - launcherWatchdogServiceName string = `LauncherKolideWatchdogSvc` - launcherServiceName string = `LauncherKolideK2Svc` - - serviceDoesNotExistError string = "The specified service does not exist as an installed service." - - serviceResetPeriodSeconds uint32 = 3 * 60 * 60 // 3 hours in seconds + taskDateFormat string = "2006-01-02T15:04:05" + watchdogTaskType string = "watchdog" ) // WatchdogController is responsible for: -// 1. adding/enabling and disabling/removing the watchdog service according to the agent flag -// 2. publishing any watchdog_logs written out by the watchdog service +// 1. adding/enabling and disabling/removing the watchdog task according to the agent flag +// 2. publishing any watchdog_logs written out by the watchdog task // // This controller is intended for use by the main launcher service invocation type WatchdogController struct { - slogger *slog.Logger - knapsack types.Knapsack - interrupt chan struct{} - interrupted bool - logPublisher types.LogStore + slogger *slog.Logger + knapsack types.Knapsack + interrupt chan struct{} + interrupted bool + logPublisher types.LogStore + configFilePath string } -func NewController(ctx context.Context, k types.Knapsack) (*WatchdogController, error) { +func NewController(ctx context.Context, k types.Knapsack, configFilePath string) (*WatchdogController, error) { // set up the log publisher, if watchdog is enabled we will need to pull those logs from sqlite periodically logPublisher, err := agentsqlite.OpenRW(ctx, k.RootDirectory(), agentsqlite.WatchdogLogStore) if err != nil { @@ -53,10 +50,11 @@ func NewController(ctx context.Context, k types.Knapsack) (*WatchdogController, } return &WatchdogController{ - slogger: k.Slogger().With("component", "watchdog_controller"), - knapsack: k, - interrupt: make(chan struct{}, 1), - logPublisher: logPublisher, + slogger: k.Slogger().With("component", "watchdog_controller"), + knapsack: k, + interrupt: make(chan struct{}, 1), + logPublisher: logPublisher, + configFilePath: configFilePath, }, nil } @@ -71,7 +69,7 @@ func (wc *WatchdogController) FlagsChanged(flagKeys ...keys.FlagKey) { // use all of the existing log publication and cleanup logic while maintaining a single writer func (wc *WatchdogController) Run() error { ctx := context.TODO() - ticker := time.NewTicker(time.Minute * 5) + ticker := time.NewTicker(time.Minute * 30) defer ticker.Stop() for { @@ -90,15 +88,26 @@ func (wc *WatchdogController) Run() error { } func (wc *WatchdogController) publishLogs(ctx context.Context) { - // note that there is a small window here where there could be pending logs before watchdog is disabled - + // we don't install watchdog for non-prod deployments, so we should also skip log publication + if !launcher.IsKolideHostedServerURL(wc.knapsack.KolideServerURL()) { + return + } + + // note that there is a small window here where there could be pending logs before the watchdog task is removed - // there is no harm in leaving them and we could recover these with the original timestamps if we ever needed. // to avoid endlessly re-processing empty logs while we are disabled, we accept this possibility and exit early here - if !wc.knapsack.LauncherWatchdogEnabled() { + watchdogTaskIsInstalled, err := watchdogTaskExists(wc.knapsack.Identifier()) + if err != nil { + wc.slogger.Log(ctx, slog.LevelWarn, + "encountered error checking if watchdog task exists", + "err", err, + ) + return } - // we don't install watchdog for non-prod deployments, so we should also skip log publication - if !launcher.IsKolideHostedServerURL(wc.knapsack.KolideServerURL()) { + // no need to parse logs if task is not installed + if !watchdogTaskIsInstalled { return } @@ -180,224 +189,413 @@ func (wc *WatchdogController) ServiceEnabledChanged(enabled bool) { return } - var serviceManager *mgr.Mgr - var err error + if !enabled { + if err := RemoveWatchdogTask(wc.knapsack.Identifier()); err != nil { + wc.slogger.Log(ctx, slog.LevelWarn, + "encountered error removing watchdog task", + "err", err, + ) - if err := backoff.WaitFor(func() error { - serviceManager, err = mgr.Connect() - if err != nil { - return fmt.Errorf("err connecting to service control manager: %w", err) + return } - return nil - }, 5*time.Second, 500*time.Millisecond); err != nil { + wc.slogger.Log(ctx, slog.LevelInfo, "removed watchdog task") + + return + } + + // we're enabling the watchdog task- we can safely always reinstall our latest version here + if err := installWatchdogTask(wc.knapsack.Identifier(), wc.configFilePath); err != nil { wc.slogger.Log(ctx, slog.LevelError, - "timed out connecting to service control manager", + "encountered error installing watchdog task", "err", err, ) + } - return + wc.slogger.Log(ctx, slog.LevelInfo, "completed watchdog scheduled task installation") +} + +// installWatchdogTask registers our watchdog subcommand as a scheduled task. +// see inline comments for details on various settings, but here is a general overview: +// Triggers: +// - 1 minute after any wake event +// - every 30 minutes as a routine check +// Action: +// - runs launcher.exe watchdog -config with a 1 minute timeout +func installWatchdogTask(identifier, configFilePath string) error { + if strings.TrimSpace(identifier) == "" { + identifier = launcher.DefaultLauncherIdentifier } - defer serviceManager.Disconnect() + taskName := launcher.TaskName(identifier, watchdogTaskType) + // init COM - we discard the error returned by CoInitialize because it + // harmlessly returns S_FALSE if we call it more than once + ole.CoInitialize(0) + defer ole.CoUninitialize() - if !enabled { - err := RemoveService(serviceManager) - if err != nil { - if err.Error() == serviceDoesNotExistError { - wc.slogger.Log(ctx, slog.LevelDebug, "watchdog service was not previously installed") - return - } + // create our scheduler object + schedService, err := oleutil.CreateObject("Schedule.Service") + if err != nil { + return fmt.Errorf("creating schedule service object: %w", err) + } + defer schedService.Release() - wc.slogger.Log(ctx, slog.LevelWarn, - "encountered error removing watchdog service", - "err", err, - ) + // get service interface handle + scheduler, err := schedService.QueryInterface(ole.IID_IDispatch) + if err != nil { + return err + } + defer scheduler.Release() - return - } + // connect to the scheduler handle on the local machine + _, err = oleutil.CallMethod(scheduler, "Connect") + if err != nil { + return fmt.Errorf("failed to connect to Task Scheduler: %w", err) + } - wc.slogger.Log(ctx, slog.LevelInfo, "removed watchdog service") + // grab the root task folder + rootFolderVar, err := oleutil.CallMethod(scheduler, "GetFolder", `\`) + if err != nil { + return fmt.Errorf("failed to get root folder: %w", err) + } - return + rootFolder := rootFolderVar.ToIDispatch() + defer rootFolder.Release() + + // begin definition for a new task + taskDefinitionTemplate, err := oleutil.CallMethod(scheduler, "NewTask", 0) + if err != nil { + return fmt.Errorf("failed to create new task definition: %w", err) } - // we're enabling the watchdog - first check if we've already installed the service - // there are three potential paths here: - // 1. service did not previously exist, proceed with clean installation - existingService, err := serviceManager.OpenService(launcherWatchdogServiceName) - if err != nil && err.Error() == serviceDoesNotExistError { - if err = wc.installService(serviceManager); err != nil { - wc.slogger.Log(ctx, slog.LevelError, - "encountered error installing watchdog service", - "err", err, - ) - } + taskDefinition := taskDefinitionTemplate.ToIDispatch() + defer taskDefinition.Release() - return + installationDate := time.Now().Format(taskDateFormat) + + // get the task registration info props + regInfoProp, err := oleutil.GetProperty(taskDefinition, "RegistrationInfo") + if err != nil { + return fmt.Errorf("getting registration info property: %w", err) + } + + regInfo := regInfoProp.ToIDispatch() + defer regInfo.Release() + + if _, err = oleutil.PutProperty(regInfo, "Description", "Kolide agent restarter"); err != nil { + return fmt.Errorf("setting reginfo description: %w", err) } - // 2. we are unable to check the current status of the service, - // this is the least likely option and there's nothing we can do here so log and return + if _, err := oleutil.PutProperty(regInfo, "Author", "Kolide, Inc."); err != nil { + return fmt.Errorf("setting reginfo author: %w", err) + } + + if _, err := oleutil.PutProperty(regInfo, "Date", installationDate); err != nil { + return fmt.Errorf("setting reginfo date: %w", err) + } + + principalProp, err := oleutil.GetProperty(taskDefinition, "Principal") if err != nil { - wc.slogger.Log(ctx, slog.LevelWarn, - "encountered error checking for watchdog service, unable to proceed with enabling", - "err", err, - ) + return fmt.Errorf("getting principal property: %w", err) + } - return + principal := principalProp.ToIDispatch() + defer principal.Release() + + // see all principal settings here https://learn.microsoft.com/en-us/windows/win32/api/taskschd/nn-taskschd-iprincipal + // 1=TASK_RUNLEVEL_HIGHEST + if _, err := oleutil.PutProperty(principal, "RunLevel", uint(1)); err != nil { + return fmt.Errorf("setting run level: %w", err) } - // 3. The watchdog service already exists on this device. Here we just restart it to ensure it is - // running on the latest launcher code - defer existingService.Close() - if err = wc.restartService(existingService); err != nil { - wc.slogger.Log(ctx, slog.LevelError, - "failure attempting to restart watchdog service", - "err", err, - ) + // get the root task settings property + settingsProp, err := oleutil.GetProperty(taskDefinition, "Settings") + if err != nil { + return fmt.Errorf("getting settings property: %w", err) } -} -func (wc *WatchdogController) getExecutablePath() (string, error) { - defaultBinDir := launcher.DefaultPath(launcher.BinDirectory) - defaultLauncherLocation := filepath.Join(defaultBinDir, "launcher.exe") - // do some basic sanity checking to prevent installation from a bad path - _, err := os.Stat(defaultLauncherLocation) + settings := settingsProp.ToIDispatch() + defer settings.Release() + + // see all available task settings here https://learn.microsoft.com/en-us/windows/win32/api/taskschd/nn-taskschd-itasksettings + // task will be enabled on creation + if _, err = oleutil.PutProperty(settings, "Enabled", true); err != nil { + return fmt.Errorf("setting enabled flag: %w", err) + } + + // start the task at any time after its scheduled time has passed + if _, err = oleutil.PutProperty(settings, "StartWhenAvailable", true); err != nil { + return fmt.Errorf("setting StartWhenAvailable flag: %w", err) + } + + // task will be started even if the computer is running on batteries + if _, err = oleutil.PutProperty(settings, "DisallowStartIfOnBatteries", false); err != nil { + return fmt.Errorf("setting DisallowStartIfOnBatteries flag: %w", err) + } + + // task will be continue even if the computer changes power source to battery + if _, err = oleutil.PutProperty(settings, "StopIfGoingOnBatteries", false); err != nil { + return fmt.Errorf("setting StopIfGoingOnBatteries flag: %w", err) + } + + // see compatibility options here https://learn.microsoft.com/en-us/windows/win32/api/taskschd/ne-taskschd-task_compatibility + // 2=TASK_COMPATIBILITY_V2 - recommended unless you need to support Windows XP, Windows Server 2003, or Windows 2000 + if _, err = oleutil.PutProperty(settings, "Compatibility", uint(2)); err != nil { + return fmt.Errorf("setting Compatibility flag: %w", err) + } + + idleSettingsProp, err := oleutil.GetProperty(settings, "IdleSettings") if err != nil { - return "", err + return fmt.Errorf("getting idle settings property: %w", err) } - return defaultLauncherLocation, nil -} + idleSettings := idleSettingsProp.ToIDispatch() + defer idleSettings.Release() -func (wc *WatchdogController) installService(serviceManager *mgr.Mgr) error { - ctx := context.TODO() - installedExePath, err := wc.getExecutablePath() + // see idle settings here https://learn.microsoft.com/en-us/windows/win32/taskschd/taskschedulerschema-idlesettings-settingstype-element + // do not stop the task if an idle condition ends before the task is completed + if _, err = oleutil.PutProperty(idleSettings, "StopOnIdleEnd", false); err != nil { + return fmt.Errorf("setting StopOnIdleEnd idlesetting: %w", err) + } + + // begin trigger definitions + triggersProp, err := oleutil.GetProperty(taskDefinition, "Triggers") if err != nil { - return fmt.Errorf("determining watchdog executable path: %w", err) + return fmt.Errorf("getting triggers property: %w", err) } - svcMgrConf := mgr.Config{ - DisplayName: launcherWatchdogServiceName, - Description: "The Kolide Launcher Watchdog Service", - StartType: mgr.StartAutomatic, - ErrorControl: mgr.ErrorNormal, - // no reason to rush start for this service, we should wait until after - // the main launcher service has attempted to start anyway - DelayedAutoStart: true, + triggers := triggersProp.ToIDispatch() + defer triggers.Release() + // see trigger types here https://learn.microsoft.com/en-us/windows/win32/api/taskschd/ne-taskschd-task_trigger_type2 + createTriggerResp, err := oleutil.CallMethod(triggers, "Create", uint(0)) // 0=TASK_TRIGGER_EVENT + if err != nil { + log.Fatalf("encountered error creating trigger: %s", err.Error()) } - serviceArgs := []string{"watchdog"} - // add any original service arguments from the main launcher service invocation (currently running) - // this is likely just a pointer to the launcher.flags file but we want to ensure that the watchdog service - // has insight into the same options for early service configuration, logging, etc. - serviceArgs = append(serviceArgs, os.Args[2:]...) + trigger := createTriggerResp.ToIDispatch() + defer trigger.Release() - restartService, err := serviceManager.CreateService( - launcherWatchdogServiceName, - installedExePath, - svcMgrConf, - serviceArgs..., + if _, err = oleutil.PutProperty(trigger, "ExecutionTimeLimit", "PT1M"); err != nil { + return fmt.Errorf("setting execution time limit property") + } + + // found the guid here https://github.com/capnspacehook/taskmaster/blob/1629df7c85e96aab410af7f1747ba264d3276505/fill.go#L168 + eventTrigger, err := trigger.QueryInterface(ole.NewGUID("{d45b0167-9653-4eef-b94f-0732ca7af251}")) + if err != nil { + return fmt.Errorf("getting trigger interface: %w", err) + } + defer eventTrigger.Release() + + eventSubscriptionTemplate := ` + + + + + + +` + eventSubscription := fmt.Sprintf(eventSubscriptionTemplate, + powereventwatcher.EventIdExitingModernStandby, + powereventwatcher.EventIdResumedFromSleep, + 1, // Microsoft-Windows-Power-Troubleshooter event ID 1 is "resumed from low power state" ) - if err != nil { // no point moving forward if we can't create the service - return err + if _, err = oleutil.PutProperty(eventTrigger, "Subscription", eventSubscription); err != nil { + return fmt.Errorf("setting subscription property: %w", err) } - defer restartService.Close() + // see details for how this string is created here: https://learn.microsoft.com/en-us/windows/win32/taskschd/eventtrigger-delay + // PT1M here means 1 minute + if _, err = oleutil.PutProperty(eventTrigger, "Delay", "PT1M"); err != nil { + return fmt.Errorf("setting event trigger delay: %w", err) + } - // set recovery actions - always restart after a 5 second delay - recoveryActions := []mgr.RecoveryAction{ - { - Type: mgr.ServiceRestart, - Delay: 5 * time.Second, - }, + // add another trigger, this one time based- repeat every 30 minutes + createTimeTriggerResp, err := oleutil.CallMethod(triggers, "Create", uint(1)) // 1=TASK_TRIGGER_TIME + if err != nil { + return fmt.Errorf("error creating time trigger object: %w", err) } - if err = restartService.SetRecoveryActions(recoveryActions, serviceResetPeriodSeconds); err != nil { - wc.slogger.Log(ctx, slog.LevelWarn, - "unable to set recovery actions for service installation, proceeding", - "err", err, - ) + timeTrigger := createTimeTriggerResp.ToIDispatch() + defer timeTrigger.Release() + + if _, err := oleutil.PutProperty(timeTrigger, "Enabled", true); err != nil { + return fmt.Errorf("setting time trigger enabled: %w", err) } - // set recovery actions on non crash failures - indicates that we want service manager - // to restart this service after terminating without a state of SERVICE_STOPPED, or whenever - // the exit code is not 0 (ERROR_SUCCESS) - if err = restartService.SetRecoveryActionsOnNonCrashFailures(true); err != nil { - wc.slogger.Log(ctx, slog.LevelWarn, - "unable to set RecoveryActionsOnNonCrashFailures flag, proceeding", - "err", err, - ) + // set the execution timeout, PT1M=1 minute + if _, err := oleutil.PutProperty(timeTrigger, "ExecutionTimeLimit", "PT1M"); err != nil { + return fmt.Errorf("setting time trigger execution time limit: %w", err) } - if err = restartService.Start(); err != nil { - wc.slogger.Log(ctx, slog.LevelWarn, - "unable to start launcher restart service", - "err", err, - ) + if _, err = oleutil.PutProperty(timeTrigger, "StartBoundary", installationDate); err != nil { + return fmt.Errorf("setting time trigger start boundary: %w", err) + } + + repetitionObj, err := oleutil.GetProperty(timeTrigger, "Repetition") + if err != nil { + return fmt.Errorf("getting time trigger repetition property: %w", err) + } + + repetition := repetitionObj.ToIDispatch() + defer repetition.Release() + + // set the repetition interval. PT30M=30 minutes + if _, err = oleutil.PutProperty(repetition, "Interval", "PT30M"); err != nil { + return fmt.Errorf("setting time trigger interval: %w", err) + } + + // begin creation of the task action + actionsProp, err := oleutil.GetProperty(taskDefinition, "Actions") + if err != nil { + return fmt.Errorf("getting actions property: %w", err) + } + + actions := actionsProp.ToIDispatch() + defer actions.Release() + + // see action types here https://learn.microsoft.com/en-us/windows/win32/api/taskschd/ne-taskschd-task_action_type + // 0=TASK_ACTION_EXEC + execActionTemplate, err := oleutil.CallMethod(actions, "Create", uint(0)) + if err != nil { + return fmt.Errorf("creating event action: %w", err) + } + + execAction := execActionTemplate.ToIDispatch() + defer execAction.Release() + + installedExePath, err := launcher.GetOriginalLauncherExecutablePath(identifier) + if err != nil { + return fmt.Errorf("determining watchdog executable path: %w", err) + } + + if _, err = oleutil.PutProperty(execAction, "Path", `"`+installedExePath+`"`); err != nil { + return fmt.Errorf("setting action path: %w", err) + } + + taskArgs := fmt.Sprintf(`watchdog -config "%s"`, configFilePath) + if _, err = oleutil.PutProperty(execAction, "Arguments", taskArgs); err != nil { + return fmt.Errorf("setting action arguments: %w", err) } - wc.slogger.Log(ctx, slog.LevelInfo, "completed watchdog service installation") + // now register the task! + _, err = oleutil.CallMethod(rootFolder, "RegisterTaskDefinition", + taskName, // Task name + taskDefinition, // Task definition + uint(6), // Flags: 6=TASK_CREATE_OR_UPDATE see https://learn.microsoft.com/en-us/windows/win32/api/taskschd/ne-taskschd-task_creation + "SYSTEM", // User: run as system + nil, // password (nil for the current user, we expect this installed from SYSTEM) + uint(5), // 5=TASK_LOGON_SERVICE_ACCOUNT see https://learn.microsoft.com/en-us/windows/win32/api/taskschd/ne-taskschd-task_logon_type + nil, // SDDL (security descriptor definition language string, nil for our purposes here) + ) + + if err != nil { + return fmt.Errorf("registering task definition: %w", err) + } return nil } -// RemoveService utilizes the passed serviceManager to remove any existing watchdog service if it exists -func RemoveService(serviceManager *mgr.Mgr) error { - existingService, err := serviceManager.OpenService(launcherWatchdogServiceName) +// RemoveWatchdogTask will determine the task name based on the given identifier, and remove +// the task from the scheduler service. This is exported for use by our remote uninstallation paths +func RemoveWatchdogTask(identifier string) error { + if strings.TrimSpace(identifier) == "" { + identifier = launcher.DefaultLauncherIdentifier + } + + taskName := launcher.TaskName(identifier, watchdogTaskType) + // init COM - we discard the error returned by CoInitialize because it + // harmlessly returns S_FALSE if we call it more than once + ole.CoInitialize(0) + defer ole.CoUninitialize() + + // create our scheduler object + schedService, err := oleutil.CreateObject("Schedule.Service") + if err != nil { + return fmt.Errorf("creating schedule service object: %w", err) + } + defer schedService.Release() + + // get service interface handle + scheduler, err := schedService.QueryInterface(ole.IID_IDispatch) if err != nil { return err } + defer scheduler.Release() - defer existingService.Close() + // connect to the scheduler handle on the local machine + _, err = oleutil.CallMethod(scheduler, "Connect") + if err != nil { + return fmt.Errorf("failed to connect to Task Scheduler: %w", err) + } - // attempt to stop the service first, we don't care if this fails because we are going to - // remove the service next anyway (the removal happens faster if stopped first, but will - // happen eventually regardless) - existingService.Control(svc.Stop) + // grab the root task folder + rootFolderVar, err := oleutil.CallMethod(scheduler, "GetFolder", `\`) + if err != nil { + return fmt.Errorf("failed to get root folder: %w", err) + } - if err := backoff.WaitFor(func() error { - if err = existingService.Delete(); err != nil { - return err - } + rootFolder := rootFolderVar.ToIDispatch() + defer rootFolder.Release() - return nil - }, 3*time.Second, 500*time.Millisecond); err != nil { - return fmt.Errorf("timed out attempting service deletion: %w", err) + // remove the task + _, err = oleutil.CallMethod(rootFolder, "DeleteTask", taskName, 0) + if err != nil { + return fmt.Errorf("failed to delete task %s: %w", taskName, err) } return nil } -func (wc *WatchdogController) restartService(service *mgr.Service) error { - status, err := service.Control(svc.Stop) +// watchdogTaskExists connects with the scheduler service to determine whether +// a watchdog task for the given identifier is installed on the device +func watchdogTaskExists(identifier string) (bool, error) { + if strings.TrimSpace(identifier) == "" { + identifier = launcher.DefaultLauncherIdentifier + } + + taskName := launcher.TaskName(identifier, watchdogTaskType) + // init COM - we discard the error returned by CoInitialize because it + // harmlessly returns S_FALSE if we call it more than once + ole.CoInitialize(0) + defer ole.CoUninitialize() + + // create our scheduler object + schedService, err := oleutil.CreateObject("Schedule.Service") if err != nil { - wc.slogger.Log(context.TODO(), slog.LevelWarn, - "error stopping service", - "err", err, - ) + return false, fmt.Errorf("creating schedule service object: %w", err) + } + defer schedService.Release() + + // get service interface handle + scheduler, err := schedService.QueryInterface(ole.IID_IDispatch) + if err != nil { + return false, err + } + defer scheduler.Release() - // always attempt to start the service regardless, if the service was already - // stopped it will still err on the control (stop) call above - return service.Start() + // connect to the scheduler handle on the local machine + _, err = oleutil.CallMethod(scheduler, "Connect") + if err != nil { + return false, fmt.Errorf("failed to connect to Task Scheduler: %w", err) } - if err := backoff.WaitFor(func() error { - status, err = service.Query() - if err != nil { - return fmt.Errorf("could not retrieve service status: %w", err) - } + // grab the root task folder + rootFolderVar, err := oleutil.CallMethod(scheduler, "GetFolder", `\`) + if err != nil { + return false, fmt.Errorf("failed to get root folder: %w", err) + } - if status.State != svc.Stopped { - return fmt.Errorf("service has not stopped") - } + rootFolder := rootFolderVar.ToIDispatch() + defer rootFolder.Release() - return nil - }, 10*time.Second, 500*time.Millisecond); err != nil { - return fmt.Errorf("timed out waiting for %s service to stop: %w", service.Name, err) + taskObj, err := oleutil.CallMethod(rootFolder, "GetTask", taskName) + // this will fail with a generic "Exception Occurred" message if the task does not exist + if err != nil { + return false, nil } - return service.Start() + taskObj.ToIDispatch().Release() + + return true, nil } diff --git a/ee/watchdog/controller_windows_test.go b/ee/watchdog/controller_windows_test.go index fdd42fefc..957f66f7c 100644 --- a/ee/watchdog/controller_windows_test.go +++ b/ee/watchdog/controller_windows_test.go @@ -21,9 +21,10 @@ func TestInterrupt_Multiple(t *testing.T) { mockKnapsack := typesmocks.NewKnapsack(t) mockKnapsack.On("RootDirectory").Return(tempRootDir) mockKnapsack.On("Slogger").Return(testSlogger) - mockKnapsack.On("LauncherWatchdogEnabled").Return(false) + mockKnapsack.On("Identifier").Return("kolide-k2") + mockKnapsack.On("KolideServerURL").Return("k2device.kolide.com") - controller, _ := NewController(context.TODO(), mockKnapsack) + controller, _ := NewController(context.TODO(), mockKnapsack, "") // Let the handler run for a bit go controller.Run() diff --git a/ee/watchdog/watchdog_service_windows.go b/ee/watchdog/watchdog_service_windows.go deleted file mode 100644 index de2cfb780..000000000 --- a/ee/watchdog/watchdog_service_windows.go +++ /dev/null @@ -1,290 +0,0 @@ -//go:build windows -// +build windows - -package watchdog - -import ( - "context" - "fmt" - "log/slog" - "os" - "time" - - "github.com/kolide/kit/version" - agentsqlite "github.com/kolide/launcher/ee/agent/storage/sqlite" - "github.com/kolide/launcher/ee/gowrapper" - "github.com/kolide/launcher/ee/powereventwatcher" - "github.com/kolide/launcher/pkg/launcher" - "github.com/kolide/launcher/pkg/log/multislogger" - "github.com/kolide/launcher/pkg/rungroup" - "github.com/pkg/errors" - "golang.org/x/sys/windows" - "golang.org/x/sys/windows/svc" - "golang.org/x/sys/windows/svc/mgr" -) - -type winWatchdogSvc struct { - systemSlogger, slogger *multislogger.MultiSlogger - opts *launcher.Options - sleepStateUpdater *powereventwatcher.InMemorySleepStateUpdater - // cachedInModernStandby is held for comparison against the current value - // from sleepStateUpdater, to allow us to trigger a healthcheck - // more frequently than the routine timer value when waking from modern standby - cachedInModernStandby bool -} - -func RunWatchdogService(systemSlogger *multislogger.MultiSlogger, args []string) error { - ctx := context.TODO() - systemSlogger.Logger = systemSlogger.Logger.With( - "service", launcherWatchdogServiceName, - "version", version.Version().Version, - ) - - systemSlogger.Log(ctx, slog.LevelInfo, "watchdog service start requested") - - opts, err := launcher.ParseOptions("", os.Args[2:]) - if err != nil { - systemSlogger.Log(ctx, slog.LevelError, - "error parsing options", - "err", err, - ) - - return fmt.Errorf("parsing options: %w", err) - } - - localSlogger := multislogger.New() - - // Create a local logger to drop logs into the sqlite DB. These will be collected and published - // to debug.json from the primary launcher invocation - if opts.RootDirectory != "" { - logWriter, err := agentsqlite.OpenRW(ctx, opts.RootDirectory, agentsqlite.WatchdogLogStore) - if err != nil { - return fmt.Errorf("opening log db in %s: %w", opts.RootDirectory, err) - } - - defer logWriter.Close() - - localSloggerHandler := slog.NewJSONHandler(logWriter, &slog.HandlerOptions{Level: slog.LevelDebug}) - - // add the sqlite handler to both local and systemSloggers - localSlogger.AddHandler(localSloggerHandler) - systemSlogger.AddHandler(localSloggerHandler) - } - - localSlogger.Logger = localSlogger.Logger.With( - "service", launcherWatchdogServiceName, - "version", version.Version().Version, - ) - - sleepStateUpdater := powereventwatcher.NewInMemorySleepStateUpdater(localSlogger.Logger) - - // Log panics from the windows service - defer func() { - if r := recover(); r != nil { - systemSlogger.Log(ctx, slog.LevelError, - "panic occurred in watchdog service", - "err", r, - ) - if err, ok := r.(error); ok { - systemSlogger.Log(ctx, slog.LevelError, - "watchdog service panic stack trace", - "stack_trace", fmt.Sprintf("%+v", errors.WithStack(err)), - ) - } - time.Sleep(time.Second) - } - }() - - if err := svc.Run(launcherWatchdogServiceName, &winWatchdogSvc{ - systemSlogger: systemSlogger, - slogger: localSlogger, - opts: opts, - sleepStateUpdater: sleepStateUpdater, - }); err != nil { - systemSlogger.Log(ctx, slog.LevelError, - "error in service run", - "err", err, - ) - time.Sleep(time.Second) - return err - } - - systemSlogger.Log(ctx, slog.LevelInfo, "service exited") - time.Sleep(time.Second) - - return nil -} - -func (w *winWatchdogSvc) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - const cmdsAccepted = svc.AcceptStop | svc.AcceptShutdown - changes <- svc.Status{State: svc.StartPending} - w.slogger.Log(ctx, slog.LevelInfo, "executing windows service") - changes <- svc.Status{State: svc.Running, Accepts: cmdsAccepted} - - runRestartServiceResults := make(chan struct{}) - - gowrapper.Go(ctx, w.systemSlogger.Logger, func() { - if err := runLauncherWatchdogService(ctx, w); err != nil { - w.systemSlogger.Log(ctx, slog.LevelInfo, - "runLauncherWatchdogService exited", - "err", err, - "stack_trace", fmt.Sprintf("%+v", errors.WithStack(err)), - ) - } else { - w.systemSlogger.Log(ctx, slog.LevelInfo, - "runLauncher exited cleanly", - ) - } - - // signal to fully exit so that the service manager can restart the service - runRestartServiceResults <- struct{}{} - }, func(r any) { - w.systemSlogger.Log(ctx, slog.LevelError, - "exiting after runLauncherWatchdogService panic", - "err", r, - ) - - // signal to fully exit so that the service manager can restart the service. - runRestartServiceResults <- struct{}{} - }) - - for { - select { - case c := <-r: - switch c.Cmd { - case svc.Interrogate: - changes <- c.CurrentStatus - // Testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4 - time.Sleep(100 * time.Millisecond) - changes <- c.CurrentStatus - case svc.Stop, svc.Shutdown: - w.systemSlogger.Log(ctx, slog.LevelInfo, "shutdown request received") - changes <- svc.Status{State: svc.StopPending} - cancel() - time.Sleep(1 * time.Second) // give checker routine enough time to shut down - changes <- svc.Status{State: svc.Stopped, Accepts: cmdsAccepted} - return ssec, errno - default: - w.systemSlogger.Log(ctx, slog.LevelInfo, - "unexpected change request", - "service", launcherWatchdogServiceName, - "change_request", fmt.Sprintf("%+v", c), - ) - } - case <-runRestartServiceResults: - w.systemSlogger.Log(ctx, slog.LevelInfo, - "shutting down restart service after exit", - ) - // We don't want to tell the service manager that we've stopped on purpose, - // so that the service manager will restart the watchdog correctly. - // We use this error code largely because the windows/svc code also uses it - // and it seems semantically correct enough; it doesn't appear to matter to us - // what the code is. - return false, uint32(windows.ERROR_EXCEPTION_IN_SERVICE) - } - } -} - -func (w *winWatchdogSvc) checkLauncherStatus(ctx context.Context) error { - if w.sleepStateUpdater.InModernStandby() { - return nil - } - - serviceManager, err := mgr.Connect() - if err != nil { - w.slogger.Log(ctx, slog.LevelError, - "connecting to service control manager", - "err", err, - ) - - return err - } - - defer serviceManager.Disconnect() - - launcherService, err := serviceManager.OpenService(launcherServiceName) - if err != nil { - return fmt.Errorf("opening launcher service: %w", err) - } - - defer launcherService.Close() - - currentStatus, err := launcherService.Query() - if err != nil { - return fmt.Errorf("checking current launcher status: %w", err) - } - - if currentStatus.State == svc.Stopped { - w.slogger.Log(ctx, slog.LevelInfo, "watchdog service checker detected stopped state, restarting") - return launcherService.Start() - } - - return nil -} - -func runLauncherWatchdogService(ctx context.Context, w *winWatchdogSvc) error { - // create a rungroup for all the actors we create to allow for easy start/stop - runGroup := rungroup.NewRunGroup() - runGroup.SetSlogger(w.slogger.Logger) - powerEventWatcher, err := powereventwatcher.New(ctx, w.slogger.Logger, w.sleepStateUpdater) - if err != nil { - w.slogger.Log(ctx, slog.LevelDebug, - "could not init power event watcher", - "err", err, - ) - } else { - runGroup.Add("powerEventWatcher", powerEventWatcher.Execute, powerEventWatcher.Interrupt) - } - - go runLauncherWatchdogStatusChecker(ctx, w) - - if err := runGroup.Run(); err != nil { - return fmt.Errorf("err from watchdog runGroup: %w", err) - } - - return nil -} - -func runLauncherWatchdogStatusChecker(ctx context.Context, w *winWatchdogSvc) error { - // to avoid constantly hitting windows service manager we run off of two timers: - // 1. a longer (routine) timer which always checks the current status - // 2. a shorter (sleepState) timer which will only trigger a check if we've recently - // woken up from modern standby - routineTicker := time.NewTicker(15 * time.Minute) - sleepStateTicker := time.NewTicker(1 * time.Minute) - - for { - select { - case <-routineTicker.C: - if err := w.checkLauncherStatus(ctx); err != nil { - w.slogger.Log(ctx, slog.LevelError, - "failure checking launcher health status", - "err", err, - ) - } - case <-sleepStateTicker.C: - // if our last reading was in modern standby, but our current reading is awake, - // trigger the status check immediately - shouldCheckStatusNow := w.cachedInModernStandby && !w.sleepStateUpdater.InModernStandby() - // always persist the cached value for the next iteration, this must be done here before - // the checkLauncherStatus call to ensure we're operating off up to date sleep status there - w.cachedInModernStandby = w.sleepStateUpdater.InModernStandby() - if shouldCheckStatusNow { - if err := w.checkLauncherStatus(ctx); err != nil { - w.slogger.Log(ctx, slog.LevelError, - "failure checking launcher health status after detecting wake state", - "err", err, - ) - } - } - - case <-ctx.Done(): - routineTicker.Stop() - sleepStateTicker.Stop() - return ctx.Err() - } - } -} diff --git a/ee/watchdog/watchdog_service_other.go b/ee/watchdog/watchdog_task_other.go similarity index 71% rename from ee/watchdog/watchdog_service_other.go rename to ee/watchdog/watchdog_task_other.go index 38a0bec60..6c79e068f 100644 --- a/ee/watchdog/watchdog_service_other.go +++ b/ee/watchdog/watchdog_task_other.go @@ -9,6 +9,6 @@ import ( "github.com/kolide/launcher/pkg/log/multislogger" ) -func RunWatchdogService(_ *multislogger.MultiSlogger, args []string) error { +func RunWatchdogTask(_ *multislogger.MultiSlogger, args []string) error { return errors.New("not implemented on non windows platforms") } diff --git a/ee/watchdog/watchdog_task_windows.go b/ee/watchdog/watchdog_task_windows.go new file mode 100644 index 000000000..ab44d2da7 --- /dev/null +++ b/ee/watchdog/watchdog_task_windows.go @@ -0,0 +1,140 @@ +//go:build windows +// +build windows + +package watchdog + +import ( + "context" + "flag" + "fmt" + "log/slog" + + "github.com/kolide/kit/version" + agentsqlite "github.com/kolide/launcher/ee/agent/storage/sqlite" + "github.com/kolide/launcher/pkg/launcher" + "github.com/kolide/launcher/pkg/log/multislogger" + "github.com/peterbourgon/ff/v3" + "golang.org/x/sys/windows/svc" + "golang.org/x/sys/windows/svc/mgr" +) + +// RunWatchdogTask is typically run as a check to determine the health of launcher and restart if required. +// it is installed as an exec action via windows scheduled task. e.g. C:\path\to\launcher.exe watchdog -config . +// you can alternatively run this subcommand to install or remove the scheduled task via the --install-task or --remove-task flags +func RunWatchdogTask(systemSlogger *multislogger.MultiSlogger, args []string) error { + launcher.DefaultAutoupdate = true + launcher.SetDefaultPaths() + + var ( + flagset = flag.NewFlagSet("watchdog", flag.ExitOnError) + flInstallTask = flagset.Bool("install-task", false, "install the watchdog as a scheduled task") + flRemoveTask = flagset.Bool("remove-task", false, "remove the watchdog as a scheduled task") + flConfigFilePath = flagset.String("config", launcher.DefaultConfigFilePath, "config file to parse options from (optional)") + ) + + // note that we don't intend to parse the config file here, just the config file path to pass to launcher's ParseOptions + ff.Parse(flagset, args) + + // pass the config file through our standard options parsing to get all default options + opts, err := launcher.ParseOptions("watchdog", []string{"-config", *flConfigFilePath}) + if err != nil { + return fmt.Errorf("parsing watchdog options: %w", err) + } + + localSlogger := multislogger.New() + + ctx := context.TODO() + launcherWatchdogTaskName := launcher.TaskName(opts.Identifier, watchdogTaskType) + systemSlogger.Logger = systemSlogger.Logger.With( + "task", launcherWatchdogTaskName, + "version", version.Version().Version, + ) + + // Create a local logger to drop logs into the sqlite DB. These will be collected and published + // to debug.json from the primary launcher invocation + if opts.RootDirectory != "" { + logWriter, err := agentsqlite.OpenRW(ctx, opts.RootDirectory, agentsqlite.WatchdogLogStore) + if err != nil { + return fmt.Errorf("opening log db in %s: %w", opts.RootDirectory, err) + } + + defer logWriter.Close() + + localSloggerHandler := slog.NewJSONHandler(logWriter, &slog.HandlerOptions{Level: slog.LevelDebug}) + + // add the sqlite handler to both local and systemSloggers + localSlogger.AddHandler(localSloggerHandler) + systemSlogger.AddHandler(localSloggerHandler) + } + + localSlogger.Logger = localSlogger.Logger.With( + "task", launcherWatchdogTaskName, + "version", version.Version().Version, + ) + + if *flInstallTask { + if err := installWatchdogTask(opts.Identifier, opts.ConfigFilePath); err != nil { + localSlogger.Log(ctx, slog.LevelWarn, + "encountered error attempting watchdog install from CLI", + "err", err, + ) + + return err + } + + return nil + } + + if *flRemoveTask { + if err := RemoveWatchdogTask(opts.Identifier); err != nil { + localSlogger.Log(ctx, slog.LevelWarn, + "encountered error attempting watchdog removal from CLI", + "err", err, + ) + + return err + } + + return nil + } + + localSlogger.Log(ctx, slog.LevelDebug, "watchdog check requested") + + launcherServiceName := launcher.ServiceName(opts.Identifier) + if err := ensureServiceRunning(ctx, localSlogger.Logger, launcherServiceName); err != nil { + localSlogger.Log(ctx, slog.LevelWarn, + "encountered error ensuring service run state", + "err", err, + ) + } + + return nil +} + +func ensureServiceRunning(ctx context.Context, slogger *slog.Logger, serviceName string) error { + serviceManager, err := mgr.Connect() + if err != nil { + return fmt.Errorf("connecting to service control manager: %w", err) + } + + defer serviceManager.Disconnect() + + launcherService, err := serviceManager.OpenService(serviceName) + if err != nil { + return fmt.Errorf("opening launcher service: %w", err) + } + + defer launcherService.Close() + + currentStatus, err := launcherService.Query() + if err != nil { + return fmt.Errorf("checking current launcher status: %w", err) + } + + if currentStatus.State == svc.Stopped { + slogger.Log(ctx, slog.LevelInfo, "watchdog checker detected stopped state, restarting") + return launcherService.Start() + } + + return nil +} diff --git a/pkg/launcher/paths.go b/pkg/launcher/paths.go index b3d23bf04..af1d68ea1 100644 --- a/pkg/launcher/paths.go +++ b/pkg/launcher/paths.go @@ -1,6 +1,7 @@ package launcher import ( + "fmt" "os" "path/filepath" "runtime" @@ -156,3 +157,34 @@ func nonEmptyFileExists(path string) (bool, error) { return fileInfo.Size() > 0, nil } + +// GetOriginalLauncherExecutablePath is a convenience function to determine and verify the location of +// the originally installed launcher executable. it uses the identifier to generate the expected path and +// verifies file presence before returning the path. this is currently in use for task installation +// on windows platforms +// Note: this will not work for NixOS, we should revisit if we end up with a use case there +func GetOriginalLauncherExecutablePath(identifier string) (string, error) { + if strings.TrimSpace(identifier) == "" { + identifier = DefaultLauncherIdentifier + } + + var binDirBase string + var launcherExeName string + + switch runtime.GOOS { + case "windows": + binDirBase = fmt.Sprintf(`C:\Program Files\Kolide\Launcher-%s\bin`, identifier) + launcherExeName = "launcher.exe" + default: + binDirBase = fmt.Sprintf(`/usr/local/%s/bin`, identifier) + launcherExeName = "launcher" + } + + launcherBin := filepath.Join(binDirBase, launcherExeName) + // do some basic sanity checking to prevent installation from a bad path + if exists, err := nonEmptyFileExists(launcherBin); err != nil || !exists { + return "", err + } + + return launcherBin, nil +} diff --git a/pkg/launcher/pkg_utils_windows.go b/pkg/launcher/pkg_utils_windows.go index d90a9fb33..a4d266205 100644 --- a/pkg/launcher/pkg_utils_windows.go +++ b/pkg/launcher/pkg_utils_windows.go @@ -25,3 +25,16 @@ func ServiceName(identifier string) string { sanitizedServiceName = fmt.Sprintf("launcher_%s_svc", sanitizedServiceName) // wrapped as launcher_kolide_k2_svc return snaker.SnakeToCamel(sanitizedServiceName) // will produce LauncherKolideK2Svc } + +// TaskName embeds the given identifier into our task name template after sanitization, +// and returns the camelCased service name generated to match our packaging logic +func TaskName(identifier, taskType string) string { + if strings.TrimSpace(identifier) == "" { + identifier = DefaultLauncherIdentifier + } + + sanitizedIdentifier := nonAlphanumericRegex.ReplaceAllString(identifier, "_") // e.g. identifier=kolide-k2 becomes kolide_k2 + sanitizedTaskType := nonAlphanumericRegex.ReplaceAllString(taskType, "_") // e.g. taskName=watchdog + sanitizedTaskName := fmt.Sprintf("launcher_%s_%s_task", sanitizedIdentifier, sanitizedTaskType) // wrapped as launcher_kolide_k2_watchdog_task + return snaker.SnakeToCamel(sanitizedTaskName) // will produce LauncherKolideK2WatchdogTask +} diff --git a/pkg/launcher/pkg_utils_windows_test.go b/pkg/launcher/pkg_utils_windows_test.go index 63866489f..1e6edd900 100644 --- a/pkg/launcher/pkg_utils_windows_test.go +++ b/pkg/launcher/pkg_utils_windows_test.go @@ -47,3 +47,47 @@ func Test_ServiceName(t *testing.T) { }) } } + +func Test_TaskName(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + testCaseName string + identifier string + taskType string + expectedTaskName string + }{ + { + testCaseName: "empty identifier expecting default task name", + identifier: " ", + taskType: "watchdog", + expectedTaskName: "LauncherKolideK2WatchdogTask", + }, + { + testCaseName: "default identifier expecting default task name", + identifier: "kolide-k2", + taskType: "watchdog", + expectedTaskName: "LauncherKolideK2WatchdogTask", + }, + { + testCaseName: "preprod identifier expecting preprod task name", + identifier: "kolide-nababe-k2", + taskType: "watchdog", + expectedTaskName: "LauncherKolideNababeK2WatchdogTask", + }, + { + testCaseName: "mangled identifier expecting default task name", + identifier: "kolide-!@_k2", + taskType: "watchdog", + expectedTaskName: "LauncherKolideK2WatchdogTask", + }, + } { + tt := tt + t.Run(tt.testCaseName, func(t *testing.T) { + t.Parallel() + + serviceName := TaskName(tt.identifier, tt.taskType) + require.Equal(t, tt.expectedTaskName, serviceName, "expected sanitized service name value to match") + }) + } +}