From 4f0d26bb009ee71eee91b602c290631305e172ed Mon Sep 17 00:00:00 2001 From: Blake Rouse Date: Wed, 12 Jun 2024 09:47:32 -0400 Subject: [PATCH] Add `unprivileged` and `privileged` subcommand to Elastic Agent (#4621) * Work on privileged/unprivileged command. * Add integration tests for switching between unprivileged and privileged mode. * Fix upstream rename. * Add changelog. * Switch to new install privileged/unprivileged checks. * Adjust FixPermissions to take ownership back to Administrators. * Code review feedback. * Fix service component check. Support switching on macOS. * Fix lint. * Update to constant. * Add tests for unprivileged switch failure with endpoint installed. * Fix runtime check to keep runtime spec. * Fix test contains. * Only run test on linux. * Linux only, more. --- ...ileged-and-privileged-switch-commands.yaml | 34 +++++ internal/pkg/agent/cmd/common.go | 2 + internal/pkg/agent/cmd/enroll_cmd.go | 51 +------ internal/pkg/agent/cmd/inspect.go | 76 ++++++---- internal/pkg/agent/cmd/install.go | 2 +- internal/pkg/agent/cmd/privileged.go | 95 ++++++++++++ internal/pkg/agent/cmd/unprivileged.go | 141 ++++++++++++++++++ internal/pkg/agent/install/install.go | 138 ++++++++--------- internal/pkg/agent/install/install_unix.go | 2 +- internal/pkg/agent/install/install_windows.go | 24 ++- internal/pkg/agent/install/prereq.go | 57 +++++++ internal/pkg/agent/install/switch.go | 106 +++++++++++++ internal/pkg/agent/install/switch_darwin.go | 102 +++++++++++++ internal/pkg/agent/install/switch_other.go | 17 +++ internal/pkg/agent/install/uninstall.go | 61 ++++---- internal/pkg/agent/perms/windows.go | 4 +- pkg/component/component.go | 7 +- pkg/component/component_test.go | 8 +- pkg/component/load.go | 7 +- pkg/component/platforms.go | 16 ++ pkg/control/v2/client/wait/agent.go | 60 ++++++++ pkg/control/v2/client/wait/common.go | 44 ++++++ testing/integration/endpoint_security_test.go | 70 +++++++++ testing/integration/switch_privileged_test.go | 139 +++++++++++++++++ .../integration/switch_unprivileged_test.go | 139 +++++++++++++++++ testing/integration/upgrade_rollback_test.go | 2 +- 26 files changed, 1179 insertions(+), 225 deletions(-) create mode 100644 changelog/fragments/1715266075-Add-unprivileged-and-privileged-switch-commands.yaml create mode 100644 internal/pkg/agent/cmd/privileged.go create mode 100644 internal/pkg/agent/cmd/unprivileged.go create mode 100644 internal/pkg/agent/install/prereq.go create mode 100644 internal/pkg/agent/install/switch.go create mode 100644 internal/pkg/agent/install/switch_darwin.go create mode 100644 internal/pkg/agent/install/switch_other.go create mode 100644 pkg/control/v2/client/wait/agent.go create mode 100644 pkg/control/v2/client/wait/common.go create mode 100644 testing/integration/switch_privileged_test.go create mode 100644 testing/integration/switch_unprivileged_test.go diff --git a/changelog/fragments/1715266075-Add-unprivileged-and-privileged-switch-commands.yaml b/changelog/fragments/1715266075-Add-unprivileged-and-privileged-switch-commands.yaml new file mode 100644 index 00000000000..2ab89cc0e65 --- /dev/null +++ b/changelog/fragments/1715266075-Add-unprivileged-and-privileged-switch-commands.yaml @@ -0,0 +1,34 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: Add unprivileged and privileged switch commands + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +description: | + Adds ability to switch between privileged and unprivileged mode using the privileged and unprivileged + subcommands respectively. + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/elastic/elastic-agent/pull/4621 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/ingest-dev/issues/2790 diff --git a/internal/pkg/agent/cmd/common.go b/internal/pkg/agent/cmd/common.go index 11b025f6c5b..5d4315b4692 100644 --- a/internal/pkg/agent/cmd/common.go +++ b/internal/pkg/agent/cmd/common.go @@ -77,6 +77,8 @@ func NewCommandWithArgs(args []string, streams *cli.IOStreams) *cobra.Command { cmd.AddCommand(newUpgradeCommandWithArgs(args, streams)) cmd.AddCommand(newEnrollCommandWithArgs(args, streams)) cmd.AddCommand(newInspectCommandWithArgs(args, streams)) + cmd.AddCommand(newPrivilegedCommandWithArgs(args, streams)) + cmd.AddCommand(newUnprivilegedCommandWithArgs(args, streams)) cmd.AddCommand(newWatchCommandWithArgs(args, streams)) cmd.AddCommand(newContainerCommand(args, streams)) cmd.AddCommand(newStatusCommand(args, streams)) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 863727d6879..4dd721cdef1 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -38,6 +38,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/release" "github.com/elastic/elastic-agent/internal/pkg/remote" "github.com/elastic/elastic-agent/pkg/control/v2/client" + "github.com/elastic/elastic-agent/pkg/control/v2/client/wait" "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/elastic/elastic-agent/pkg/core/process" "github.com/elastic/elastic-agent/pkg/utils" @@ -335,7 +336,7 @@ func (c *enrollCmd) fleetServerBootstrap(ctx context.Context, persistentConfig m if err != nil { if !c.options.FleetServer.SpawnAgent { // wait longer to try and communicate with the Elastic Agent - err = waitForAgent(ctx, c.options.DaemonTimeout) + err = wait.ForAgent(ctx, c.options.DaemonTimeout) if err != nil { return "", errors.New("failed to communicate with elastic-agent daemon; is elastic-agent running?") } @@ -722,54 +723,6 @@ type waitResult struct { err error } -func waitForAgent(ctx context.Context, timeout time.Duration) error { - if timeout == 0 { - timeout = 1 * time.Minute - } - if timeout > 0 { - var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, timeout) - defer cancel() - } - maxBackoff := timeout - if maxBackoff <= 0 { - // indefinite timeout - maxBackoff = 10 * time.Minute - } - - resChan := make(chan waitResult) - innerCtx, innerCancel := context.WithCancel(context.Background()) - defer innerCancel() - go func() { - backOff := expBackoffWithContext(innerCtx, 1*time.Second, maxBackoff) - for { - backOff.Wait() - _, err := getDaemonState(innerCtx) - if errors.Is(err, context.Canceled) { - resChan <- waitResult{err: err} - return - } - if err == nil { - resChan <- waitResult{} - break - } - } - }() - - var res waitResult - select { - case <-ctx.Done(): - innerCancel() - res = <-resChan - case res = <-resChan: - } - - if res.err != nil { - return res.err - } - return nil -} - func waitForFleetServer(ctx context.Context, agentSubproc <-chan *os.ProcessState, log *logger.Logger, timeout time.Duration) (string, error) { if timeout == 0 { timeout = 2 * time.Minute diff --git a/internal/pkg/agent/cmd/inspect.go b/internal/pkg/agent/cmd/inspect.go index 6b9155cec21..271aa26eedc 100644 --- a/internal/pkg/agent/cmd/inspect.go +++ b/internal/pkg/agent/cmd/inspect.go @@ -252,43 +252,12 @@ func inspectComponents(ctx context.Context, cfgPath string, opts inspectComponen return err } - // Load the requirements before trying to load the configuration. These should always load - // even if the configuration is wrong. - platform, err := component.LoadPlatformDetail() - if err != nil { - return fmt.Errorf("failed to gather system information: %w", err) - } - specs, err := component.LoadRuntimeSpecs(paths.Components(), platform) - if err != nil { - return fmt.Errorf("failed to detect inputs and outputs: %w", err) - } - - isAdmin, err := utils.HasRoot() - if err != nil { - return fmt.Errorf("error checking for root/Administrator privileges: %w", err) - } - - m, lvl, err := getConfigWithVariables(ctx, l, cfgPath, opts.variablesWait, !isAdmin) + comps, err := getComponentsFromPolicy(ctx, l, cfgPath, opts.variablesWait) if err != nil { + // error already includes the context return err } - monitorFn, err := getMonitoringFn(ctx, m) - if err != nil { - return fmt.Errorf("failed to get monitoring: %w", err) - } - - agentInfo, err := info.NewAgentInfoWithLog(ctx, "error", false) - if err != nil { - return fmt.Errorf("could not load agent info: %w", err) - } - - // Compute the components from the computed configuration. - comps, err := specs.ToComponents(m, monitorFn, lvl, agentInfo) - if err != nil { - return fmt.Errorf("failed to render components: %w", err) - } - // Hide configuration unless toggled on. if !opts.showConfig { for i, comp := range comps { @@ -349,6 +318,47 @@ func inspectComponents(ctx context.Context, cfgPath string, opts inspectComponen return printComponents(allowed, blocked, streams) } +func getComponentsFromPolicy(ctx context.Context, l *logger.Logger, cfgPath string, variablesWait time.Duration, platformModifiers ...component.PlatformModifier) ([]component.Component, error) { + // Load the requirements before trying to load the configuration. These should always load + // even if the configuration is wrong. + platform, err := component.LoadPlatformDetail(platformModifiers...) + if err != nil { + return nil, fmt.Errorf("failed to gather system information: %w", err) + } + specs, err := component.LoadRuntimeSpecs(paths.Components(), platform) + if err != nil { + return nil, fmt.Errorf("failed to detect inputs and outputs: %w", err) + } + + isAdmin, err := utils.HasRoot() + if err != nil { + return nil, fmt.Errorf("error checking for root/Administrator privileges: %w", err) + } + + m, lvl, err := getConfigWithVariables(ctx, l, cfgPath, variablesWait, !isAdmin) + if err != nil { + return nil, err + } + + monitorFn, err := getMonitoringFn(ctx, m) + if err != nil { + return nil, fmt.Errorf("failed to get monitoring: %w", err) + } + + agentInfo, err := info.NewAgentInfoWithLog(ctx, "error", false) + if err != nil { + return nil, fmt.Errorf("could not load agent info: %w", err) + } + + // Compute the components from the computed configuration. + comps, err := specs.ToComponents(m, monitorFn, lvl, agentInfo) + if err != nil { + return nil, fmt.Errorf("failed to render components: %w", err) + } + + return comps, nil +} + func getMonitoringFn(ctx context.Context, cfg map[string]interface{}) (component.GenerateMonitoringCfgFn, error) { config, err := config.NewConfigFrom(cfg) if err != nil { diff --git a/internal/pkg/agent/cmd/install.go b/internal/pkg/agent/cmd/install.go index cb58f2a8f7c..c6692d26946 100644 --- a/internal/pkg/agent/cmd/install.go +++ b/internal/pkg/agent/cmd/install.go @@ -230,7 +230,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error { defer func() { if err != nil { progBar.Describe("Stopping Service") - innerErr := install.StopService(topPath) + innerErr := install.StopService(topPath, install.DefaultStopTimeout, install.DefaultStopInterval) if innerErr != nil { progBar.Describe("Failed to Stop Service") } else { diff --git a/internal/pkg/agent/cmd/privileged.go b/internal/pkg/agent/cmd/privileged.go new file mode 100644 index 00000000000..6d76cbe0cfd --- /dev/null +++ b/internal/pkg/agent/cmd/privileged.go @@ -0,0 +1,95 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package cmd + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/spf13/cobra" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/install" + "github.com/elastic/elastic-agent/internal/pkg/cli" + "github.com/elastic/elastic-agent/pkg/control/v2/client/wait" + "github.com/elastic/elastic-agent/pkg/utils" +) + +func newPrivilegedCommandWithArgs(s []string, streams *cli.IOStreams) *cobra.Command { + cmd := &cobra.Command{ + Use: "privileged", + Short: "Switch installed Elastic Agent to run as privileged", + Long: `This command converts the installed Elastic Agent from running unprivileged to running as privileged. + +By default this command will ask or a confirmation before making this change. You can bypass the confirmation request +using the -f flag. This is not a zero downtime operation and will always stop the running Elastic Agent (if running). +It is possible that loss of metrics, logs, or data could occur during this window of time. The Elastic Agent +daemon will always be started (even if it was off to start). In the case that the Elastic Agent is already running +privileged it will still perform all the same work, including stopping and starting the Elastic Agent. +`, + Args: cobra.ExactArgs(0), + Run: func(c *cobra.Command, args []string) { + if err := privilegedCmd(streams, c); err != nil { + fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage()) + os.Exit(1) + } + }, + } + + cmd.Flags().BoolP("force", "f", false, "Do not prompt for confirmation") + cmd.Flags().DurationP("daemon-timeout", "", 0, "Timeout waiting for Elastic Agent daemon restart after the change is applied (-1 = no wait)") + + return cmd +} + +func privilegedCmd(streams *cli.IOStreams, cmd *cobra.Command) (err error) { + isAdmin, err := utils.HasRoot() + if err != nil { + return fmt.Errorf("unable to perform privileged command while checking for root/Administrator rights: %w", err) + } + if !isAdmin { + return fmt.Errorf("unable to perform privileged command, not executed with %s permissions", utils.PermissionUser) + } + + topPath := paths.Top() + daemonTimeout, _ := cmd.Flags().GetDuration("daemon-timeout") + force, _ := cmd.Flags().GetBool("force") + if !force { + confirm, err := cli.Confirm("This will restart the running Elastic Agent and convert it to run in privileged mode. Do you want to continue?", true) + if err != nil { + return fmt.Errorf("problem reading prompt response") + } + if !confirm { + return fmt.Errorf("unprivileged switch was cancelled by the user") + } + } + + pt := install.CreateAndStartNewSpinner(streams.Out, "Converting Elastic Agent to privileged...") + err = install.SwitchExecutingMode(topPath, pt, "", "") + if err != nil { + // error already adds context + return err + } + + // wait for the service + if daemonTimeout >= 0 { + pt.Describe("Waiting for running service") + ctx := handleSignal(context.Background()) // allowed to be cancelled + err = wait.ForAgent(ctx, daemonTimeout) + if err != nil { + if errors.Is(err, context.Canceled) { + pt.Describe("Cancelled waiting for running service") + return nil + } + pt.Describe("Failed waiting for running service") + return err + } + pt.Describe("Service is up and running") + } + + return nil +} diff --git a/internal/pkg/agent/cmd/unprivileged.go b/internal/pkg/agent/cmd/unprivileged.go new file mode 100644 index 00000000000..df97f029fa0 --- /dev/null +++ b/internal/pkg/agent/cmd/unprivileged.go @@ -0,0 +1,141 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package cmd + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/spf13/cobra" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/install" + "github.com/elastic/elastic-agent/internal/pkg/cli" + "github.com/elastic/elastic-agent/pkg/component" + "github.com/elastic/elastic-agent/pkg/control/v2/client/wait" + "github.com/elastic/elastic-agent/pkg/utils" +) + +func newUnprivilegedCommandWithArgs(s []string, streams *cli.IOStreams) *cobra.Command { + cmd := &cobra.Command{ + Use: "unprivileged", + Short: "Switch installed Elastic Agent to run as unprivileged", + Long: `This command converts the installed Elastic Agent from running privileged to running as unprivileged. + +By default this command will ask or a confirmation before making this change. You can bypass the confirmation request +using the -f flag. This is not a zero downtime operation and will always stop the running Elastic Agent (if running). +It is possible that loss of metrics, logs, or data could occur during this window of time. The Elastic Agent +daemon will always be started (even if it was off to start). In the case that the Elastic Agent is already running +unprivileged it will still perform all the same work, including stopping and starting the Elastic Agent. +`, + Args: cobra.ExactArgs(0), + Run: func(c *cobra.Command, args []string) { + if err := unprivilegedCmd(streams, c); err != nil { + fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage()) + os.Exit(1) + } + }, + } + + cmd.Flags().BoolP("force", "f", false, "Do not prompt for confirmation") + cmd.Flags().DurationP("daemon-timeout", "", 0, "Timeout waiting for Elastic Agent daemon restart after the change is applied (-1 = no wait)") + + return cmd +} + +func unprivilegedCmd(streams *cli.IOStreams, cmd *cobra.Command) (err error) { + isAdmin, err := utils.HasRoot() + if err != nil { + return fmt.Errorf("unable to perform unprivileged command while checking for root/Administrator rights: %w", err) + } + if !isAdmin { + return fmt.Errorf("unable to perform unprivileged command, not executed with %s permissions", utils.PermissionUser) + } + + // cannot switch to unprivileged when service components have issues + err = ensureNoServiceComponentIssues() + if err != nil { + // error already adds context + return err + } + + topPath := paths.Top() + daemonTimeout, _ := cmd.Flags().GetDuration("daemon-timeout") + force, _ := cmd.Flags().GetBool("force") + if !force { + confirm, err := cli.Confirm("This will restart the running Elastic Agent and convert it to run in unprivileged mode. Do you want to continue?", true) + if err != nil { + return fmt.Errorf("problem reading prompt response") + } + if !confirm { + return fmt.Errorf("unprivileged switch was cancelled by the user") + } + } + + pt := install.CreateAndStartNewSpinner(streams.Out, "Converting Elastic Agent to unprivileged...") + err = install.SwitchExecutingMode(topPath, pt, install.ElasticUsername, install.ElasticGroupName) + if err != nil { + // error already adds context + return err + } + + // wait for the service + if daemonTimeout >= 0 { + pt.Describe("Waiting for running service") + ctx := handleSignal(context.Background()) // allowed to be cancelled + err = wait.ForAgent(ctx, daemonTimeout) + if err != nil { + if errors.Is(err, context.Canceled) { + pt.Describe("Cancelled waiting for running service") + return nil + } + pt.Describe("Failed waiting for running service") + return err + } + pt.Describe("Service is up and running") + } + + return nil +} + +func ensureNoServiceComponentIssues() error { + ctx := context.Background() + l, err := newErrorLogger() + if err != nil { + return fmt.Errorf("failed to create error logger: %w", err) + } + // this forces the component calculation to always compute with no root + // this allows any runtime preventions to error for a component when it has a no root support + comps, err := getComponentsFromPolicy(ctx, l, paths.ConfigFile(), 0, forceNonRoot) + if err != nil { + return fmt.Errorf("failed to create component model from policy: %w", err) + } + var errs []error + for _, comp := range comps { + if comp.InputSpec == nil { + // no spec (safety net) + continue + } + if comp.InputSpec.Spec.Service == nil { + // not a service component, allowed to exist (even if it needs root) + continue + } + if comp.Err != nil { + // service component has an error (most likely because it cannot run without root) + errs = append(errs, fmt.Errorf("%s -> %w", comp.ID, comp.Err)) + } + } + if len(errs) > 0 { + return fmt.Errorf("unable to switch to unprivileged mode due to the following service based components having issues: %w", errors.Join(errs...)) + } + return nil +} + +func forceNonRoot(detail component.PlatformDetail) component.PlatformDetail { + detail.User.Root = false + return detail +} diff --git a/internal/pkg/agent/install/install.go b/internal/pkg/agent/install/install.go index 922bb85d8b6..ea9e4e350dc 100644 --- a/internal/pkg/agent/install/install.go +++ b/internal/pkg/agent/install/install.go @@ -11,6 +11,7 @@ import ( "path/filepath" "runtime" "strings" + "time" "github.com/jaypipes/ghw" "github.com/kardianos/service" @@ -31,6 +32,11 @@ const ( ElasticUsername = "elastic-agent-user" ElasticGroupName = "elastic-agent" + + // DefaultStopTimeout is the default stop timeout that can be used to stop a running daemon. + DefaultStopTimeout = 30 * time.Second + // DefaultStopInterval is the check interval to determine if the service has stopped. + DefaultStopInterval = 250 * time.Millisecond ) // Install installs Elastic Agent persistently on the system including creating and starting its service. @@ -66,40 +72,10 @@ func Install(cfgFile, topPath string, unprivileged bool, log *logp.Logger, pt *p if unprivileged { username = ElasticUsername groupName = ElasticGroupName - - // ensure required group - ownership.GID, err = FindGID(groupName) - if err != nil && !errors.Is(err, ErrGroupNotFound) { - return utils.FileOwner{}, fmt.Errorf("failed finding group %s: %w", groupName, err) - } - if errors.Is(err, ErrGroupNotFound) { - pt.Describe(fmt.Sprintf("Creating group %s", groupName)) - ownership.GID, err = CreateGroup(groupName) - if err != nil { - pt.Describe(fmt.Sprintf("Failed to create group %s", groupName)) - return utils.FileOwner{}, fmt.Errorf("failed to create group %s: %w", groupName, err) - } - pt.Describe(fmt.Sprintf("Successfully created group %s", groupName)) - } - - // ensure required user - ownership.UID, err = FindUID(username) - if err != nil && !errors.Is(err, ErrUserNotFound) { - return utils.FileOwner{}, fmt.Errorf("failed finding username %s: %w", username, err) - } - if errors.Is(err, ErrUserNotFound) { - pt.Describe(fmt.Sprintf("Creating user %s", username)) - ownership.UID, err = CreateUser(username, ownership.GID) - if err != nil { - pt.Describe(fmt.Sprintf("Failed to create user %s", username)) - return utils.FileOwner{}, fmt.Errorf("failed to create user %s: %w", username, err) - } - err = AddUserToGroup(username, groupName) - if err != nil { - pt.Describe(fmt.Sprintf("Failed to add user %s to group %s", username, groupName)) - return utils.FileOwner{}, fmt.Errorf("failed to add user %s to group %s: %w", username, groupName, err) - } - pt.Describe(fmt.Sprintf("Successfully created user %s", username)) + ownership, err = EnsureUserAndGroup(username, groupName, pt) + if err != nil { + // error context already added by EnsureUserAndGroup + return utils.FileOwner{}, err } } @@ -191,34 +167,11 @@ func Install(cfgFile, topPath string, unprivileged bool, log *logp.Logger, pt *p // install service pt.Describe("Installing service") - opts, err := withServiceOptions(username, groupName) + err = InstallService(topPath, ownership, username, groupName) if err != nil { pt.Describe("Failed to install service") - return ownership, fmt.Errorf("error getting service installation options: %w", err) - } - svc, err := newService(topPath, opts...) - if err != nil { - pt.Describe("Failed to install service") - return ownership, fmt.Errorf("error installing new service: %w", err) - } - err = svc.Install() - if err != nil { - pt.Describe("Failed to install service") - return ownership, errors.New( - err, - fmt.Sprintf("failed to install service (%s)", paths.ServiceName), - errors.M("service", paths.ServiceName)) - } - err = servicePostInstall(ownership) - if err != nil { - pt.Describe("Failed to configure service") - - // ignore error - _ = svc.Uninstall() - return ownership, errors.New( - err, - fmt.Sprintf("failed to configure service (%s)", paths.ServiceName), - errors.M("service", paths.ServiceName)) + // error context already added by InstallService + return ownership, err } pt.Describe("Installed service") @@ -387,31 +340,29 @@ func StartService(topPath string) error { // only starting the service, so no need to set the username and group to any value svc, err := newService(topPath) if err != nil { - return fmt.Errorf("error creating new service handler: %w", err) + return fmt.Errorf("error creating new service handler for start: %w", err) } err = svc.Start() if err != nil { - return errors.New( - err, - fmt.Sprintf("failed to start service (%s)", paths.ServiceName), - errors.M("service", paths.ServiceName)) + return fmt.Errorf("failed to start service (%s): %w", paths.ServiceName, err) } return nil } // StopService stops the installed service. -func StopService(topPath string) error { +func StopService(topPath string, timeout time.Duration, interval time.Duration) error { // only stopping the service, so no need to set the username and group to any value svc, err := newService(topPath) if err != nil { - return fmt.Errorf("error creating new service handler: %w", err) + return fmt.Errorf("error creating new service handler for stop: %w", err) } err = svc.Stop() if err != nil { - return errors.New( - err, - fmt.Sprintf("failed to stop service (%s)", paths.ServiceName), - errors.M("service", paths.ServiceName)) + return fmt.Errorf("failed to stop service (%s): %w", paths.ServiceName, err) + } + err = isStopped(timeout, interval, paths.ServiceName) + if err != nil { + return fmt.Errorf("failed to stop service (%s): %w", paths.ServiceName, err) } return nil } @@ -421,14 +372,11 @@ func RestartService(topPath string) error { // only restarting the service, so no need to set the username and group to any value svc, err := newService(topPath) if err != nil { - return fmt.Errorf("error creating new service handler: %w", err) + return fmt.Errorf("error creating new service handler for restart: %w", err) } err = svc.Restart() if err != nil { - return errors.New( - err, - fmt.Sprintf("failed to restart service (%s)", paths.ServiceName), - errors.M("service", paths.ServiceName)) + return fmt.Errorf("failed to restart service (%s): %w", paths.ServiceName, err) } return nil } @@ -437,11 +385,47 @@ func RestartService(topPath string) error { func StatusService(topPath string) (service.Status, error) { svc, err := newService(topPath) if err != nil { - return service.StatusUnknown, err + return service.StatusUnknown, fmt.Errorf("error creating new service handler for status: %w", err) } return svc.Status() } +// InstallService installs the service. +func InstallService(topPath string, ownership utils.FileOwner, username string, groupName string) error { + opts, err := withServiceOptions(username, groupName) + if err != nil { + return fmt.Errorf("error getting service installation options: %w", err) + } + svc, err := newService(topPath, opts...) + if err != nil { + return fmt.Errorf("error creating new service handler for install: %w", err) + } + err = svc.Install() + if err != nil { + return fmt.Errorf("failed to install service (%s): %w", paths.ServiceName, err) + } + err = serviceConfigure(ownership) + if err != nil { + // ignore error + _ = svc.Uninstall() + return fmt.Errorf("failed to configure service (%s): %w", paths.ServiceName, err) + } + return nil +} + +// UninstallService uninstalls the service. +func UninstallService(topPath string) error { + svc, err := newService(topPath) + if err != nil { + return fmt.Errorf("error creating new service handler for uninstall: %w", err) + } + err = svc.Uninstall() + if err != nil { + return fmt.Errorf("failed to uninstall service (%s): %w", paths.ServiceName, err) + } + return nil +} + // findDirectory returns the directory to copy into the installation location. // // This also verifies that the discovered directory is a valid directory for installation. diff --git a/internal/pkg/agent/install/install_unix.go b/internal/pkg/agent/install/install_unix.go index 8bc679060a5..e24e8807b57 100644 --- a/internal/pkg/agent/install/install_unix.go +++ b/internal/pkg/agent/install/install_unix.go @@ -32,7 +32,7 @@ func withServiceOptions(username string, groupName string) ([]serviceOpt, error) return []serviceOpt{withUserGroup(username, groupName)}, nil } -func servicePostInstall(ownership utils.FileOwner) error { +func serviceConfigure(ownership utils.FileOwner) error { // do nothing on unix return nil } diff --git a/internal/pkg/agent/install/install_windows.go b/internal/pkg/agent/install/install_windows.go index 3790532b7fa..89689a8d66e 100644 --- a/internal/pkg/agent/install/install_windows.go +++ b/internal/pkg/agent/install/install_windows.go @@ -75,24 +75,20 @@ func withServiceOptions(username string, groupName string) ([]serviceOpt, error) return []serviceOpt{withUserGroup(username, groupName), withPassword(password)}, nil } -// servicePostInstall sets the security descriptor for the service +// serviceConfigure sets the security descriptor for the service // // gives user the ability to control the service, needed when installed with --unprivileged or // ReExec is not possible on Windows. -func servicePostInstall(ownership utils.FileOwner) error { - if ownership.UID == "" { - // no user, running with LOCAL SYSTEM (do nothing) - return nil - } - +func serviceConfigure(ownership utils.FileOwner) error { // https://learn.microsoft.com/en-us/windows-hardware/drivers/kernel/sddl-for-device-objects - securityDescriptor, err := windows.SecurityDescriptorFromString( - "D:(A;;GA;;;SY)" + // SDDL_LOCAL_SYSTEM -> SDDL_GENERIC_ALL - "(A;;GA;;;BA)" + // SDDL_BUILTIN_ADMINISTRATORS -> SDDL_GENERIC_ALL - "(A;;GR;;;WD)" + // SDDL_EVERYONE -> SDDL_GENERIC_READ - "(A;;GRGX;;;NS)" + // SDDL_NETWORK_SERVICE -> SDDL_GENERIC_READ|SDDL_GENERIC_EXECUTE - fmt.Sprintf("(A;;GA;;;%s)", ownership.UID), // Ownership UID -> SDDL_GENERIC_ALL - ) + sddl := "D:(A;;GA;;;SY)" + // SDDL_LOCAL_SYSTEM -> SDDL_GENERIC_ALL + "(A;;GA;;;BA)" + // SDDL_BUILTIN_ADMINISTRATORS -> SDDL_GENERIC_ALL + "(A;;GR;;;WD)" + // SDDL_EVERYONE -> SDDL_GENERIC_READ + "(A;;GRGX;;;NS)" // SDDL_NETWORK_SERVICE -> SDDL_GENERIC_READ|SDDL_GENERIC_EXECUTE + if ownership.UID != "" { + sddl += fmt.Sprintf("(A;;GA;;;%s)", ownership.UID) // Ownership UID -> SDDL_GENERIC_ALL + } + securityDescriptor, err := windows.SecurityDescriptorFromString(sddl) if err != nil { return fmt.Errorf("failed to build security descriptor from SSDL: %w", err) } diff --git a/internal/pkg/agent/install/prereq.go b/internal/pkg/agent/install/prereq.go new file mode 100644 index 00000000000..ca7f56dcafd --- /dev/null +++ b/internal/pkg/agent/install/prereq.go @@ -0,0 +1,57 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package install + +import ( + "errors" + "fmt" + + "github.com/schollz/progressbar/v3" + + "github.com/elastic/elastic-agent/pkg/utils" +) + +// EnsureUserAndGroup creates the given username and group returning the file ownership information for that +// user and group. +func EnsureUserAndGroup(username string, groupName string, pt *progressbar.ProgressBar) (utils.FileOwner, error) { + var err error + var ownership utils.FileOwner + + // ensure required group + ownership.GID, err = FindGID(groupName) + if err != nil && !errors.Is(err, ErrGroupNotFound) { + return utils.FileOwner{}, fmt.Errorf("failed finding group %s: %w", groupName, err) + } + if errors.Is(err, ErrGroupNotFound) { + pt.Describe(fmt.Sprintf("Creating group %s", groupName)) + ownership.GID, err = CreateGroup(groupName) + if err != nil { + pt.Describe(fmt.Sprintf("Failed to create group %s", groupName)) + return utils.FileOwner{}, fmt.Errorf("failed to create group %s: %w", groupName, err) + } + pt.Describe(fmt.Sprintf("Successfully created group %s", groupName)) + } + + // ensure required user + ownership.UID, err = FindUID(username) + if err != nil && !errors.Is(err, ErrUserNotFound) { + return utils.FileOwner{}, fmt.Errorf("failed finding username %s: %w", username, err) + } + if errors.Is(err, ErrUserNotFound) { + pt.Describe(fmt.Sprintf("Creating user %s", username)) + ownership.UID, err = CreateUser(username, ownership.GID) + if err != nil { + pt.Describe(fmt.Sprintf("Failed to create user %s", username)) + return utils.FileOwner{}, fmt.Errorf("failed to create user %s: %w", username, err) + } + err = AddUserToGroup(username, groupName) + if err != nil { + pt.Describe(fmt.Sprintf("Failed to add user %s to group %s", username, groupName)) + return utils.FileOwner{}, fmt.Errorf("failed to add user %s to group %s: %w", username, groupName, err) + } + pt.Describe(fmt.Sprintf("Successfully created user %s", username)) + } + return ownership, nil +} diff --git a/internal/pkg/agent/install/switch.go b/internal/pkg/agent/install/switch.go new file mode 100644 index 00000000000..8412ce28cb2 --- /dev/null +++ b/internal/pkg/agent/install/switch.go @@ -0,0 +1,106 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package install + +import ( + "fmt" + + "github.com/kardianos/service" + "github.com/schollz/progressbar/v3" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/perms" + "github.com/elastic/elastic-agent/pkg/utils" +) + +// SwitchExecutingMode switches the executing mode of the installed Elastic Agent. +// +// When username and groupName are blank then it switched back to root/Administrator and when a username/groupName is +// provided then it switched to running with that username and groupName. +func SwitchExecutingMode(topPath string, pt *progressbar.ProgressBar, username string, groupName string) error { + // ensure service is stopped + status, err := EnsureStoppedService(topPath, pt) + if err != nil { + // context for the error already provided in the EnsureStoppedService function + return err + } + + // ensure that upon exit of this function that the service is always placed back to running, in the case + // that it was running when the command was executed + defer func() { + if err != nil && status == service.StatusRunning { + _ = StartService(topPath) + } + }() + + // ensure user/group are created + var ownership utils.FileOwner + if username != "" && groupName != "" { + ownership, err = EnsureUserAndGroup(username, groupName, pt) + if err != nil { + // context for the error already provided in the EnsureUserAndGroup function + return err + } + } + + // **start critical section** + // after this point changes will be made that can leave the installed Elastic Agent broken if they do not + // complete successfully + + // perform platform specific work + err = switchPlatformMode(pt, ownership) + + // fix all permissions to use the new ownership + pt.Describe("Adjusting permissions") + err = perms.FixPermissions(topPath, perms.WithOwnership(ownership)) + if err != nil { + return fmt.Errorf("failed to perform permission changes on path %s: %w", topPath, err) + } + if paths.ShellWrapperPath != "" { + err = perms.FixPermissions(paths.ShellWrapperPath, perms.WithOwnership(ownership)) + if err != nil { + return fmt.Errorf("failed to perform permission changes on path %s: %w", paths.ShellWrapperPath, err) + } + } + + // the service has to be uninstalled + pt.Describe("Removing service") + // error is ignored because it's possible that its already uninstalled + // + // this can happen if this action failed in the middle of this critical section, so to allow the + // command to be called again we don't error on the uninstall + // + // the install error below will include an error about the service still existing if this failed + // to uninstall (really this should never fail, but the unexpected can happen) + _ = UninstallService(topPath) + + // re-install service + pt.Describe("Installing service") + err = InstallService(topPath, ownership, username, groupName) + if err != nil { + pt.Describe("Failed to install service") + // error context already added by InstallService + + // this is now in a bad state, because the service is uninstall and now the service failed to install + return err + } + pt.Describe("Installed service") + + // start the service + pt.Describe("Starting service") + err = StartService(topPath) + if err != nil { + pt.Describe("Failed to start service") + // error context already added by InstallService + + // this is now in a bad state, because the service is not running and failed to install + return err + } + + // **end critical section** + // service is now re-created and started + + return nil +} diff --git a/internal/pkg/agent/install/switch_darwin.go b/internal/pkg/agent/install/switch_darwin.go new file mode 100644 index 00000000000..3eb566cc5e6 --- /dev/null +++ b/internal/pkg/agent/install/switch_darwin.go @@ -0,0 +1,102 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build darwin + +package install + +import ( + "context" + "fmt" + "os" + + "github.com/schollz/progressbar/v3" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/secret" + "github.com/elastic/elastic-agent/internal/pkg/agent/vault" + "github.com/elastic/elastic-agent/pkg/utils" +) + +func switchPlatformMode(pt *progressbar.ProgressBar, ownership utils.FileOwner) error { + ctx := context.Background() + + unprivilegedVault, err := checkForUnprivilegedVault(ctx) + if err != nil { + return fmt.Errorf("error checking for unprivileged vault: %w", err) + } + if unprivilegedVault { + if ownership.UID != 0 { + // already has unprivileged vault and going into unprivileged mode (nothing to do) + return nil + } + + pt.Describe("Migrating the vault") + + // get the agent secret from the file vault + var fileVaultOpts vault.Options + vault.WithReadonly(true)(&fileVaultOpts) + vault.WithVaultPath(paths.AgentVaultPath())(&fileVaultOpts) + fileVault, err := vault.NewFileVault(ctx, fileVaultOpts) + if err != nil { + return fmt.Errorf("failed to open file vault: %w", err) + } + agentKey, err := fileVault.Get(ctx, secret.AgentSecretKey) + if err != nil { + return fmt.Errorf("failed to get agent secret from file vault: %w", err) + } + + // set the agent secret into the keychain vault + keychainVault, err := vault.NewDarwinKeyChainVault(ctx, vault.Options{}) + if err != nil { + return fmt.Errorf("failed to open keychain vault: %w", err) + } + err = keychainVault.Set(ctx, secret.AgentSecretKey, agentKey) + if err != nil { + return fmt.Errorf("failed to set agent secret into keychain vault: %w", err) + } + + // remove the file-based vault path + err = os.Remove(paths.AgentVaultPath()) + if err != nil { + return fmt.Errorf("failed to delete file vault: %w", err) + } + + return nil + } + if ownership.UID == 0 { + // already has privileged vault and going into privileged mode (nothing to do) + return nil + } + + pt.Describe("Migrating the vault") + + // get the agent secret from the keychain vault + var keychainVaultOpts vault.Options + vault.WithReadonly(true)(&keychainVaultOpts) + keychainVault, err := vault.NewDarwinKeyChainVault(ctx, keychainVaultOpts) + if err != nil { + return fmt.Errorf("failed to open keychain vault: %w", err) + } + agentKey, err := keychainVault.Get(ctx, secret.AgentSecretKey) + if err != nil { + return fmt.Errorf("failed to get agent secret from keychain vault: %w", err) + } + + // set the agent secret into the file vault + var fileVaultOpts vault.Options + vault.WithVaultPath(paths.AgentVaultPath())(&fileVaultOpts) + fileVault, err := vault.NewFileVault(ctx, fileVaultOpts) + if err != nil { + return fmt.Errorf("failed to open file vault: %w", err) + } + err = fileVault.Set(ctx, secret.AgentSecretKey, agentKey) + if err != nil { + return fmt.Errorf("failed to set agent secret into file vault: %w", err) + } + + // no need to set the permissions, that will be set in the next step of the switch operation + + return nil +} diff --git a/internal/pkg/agent/install/switch_other.go b/internal/pkg/agent/install/switch_other.go new file mode 100644 index 00000000000..79c9b1a01ca --- /dev/null +++ b/internal/pkg/agent/install/switch_other.go @@ -0,0 +1,17 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !darwin + +package install + +import ( + "github.com/schollz/progressbar/v3" + + "github.com/elastic/elastic-agent/pkg/utils" +) + +func switchPlatformMode(pt *progressbar.ProgressBar, ownership utils.FileOwner) error { + return nil +} diff --git a/internal/pkg/agent/install/uninstall.go b/internal/pkg/agent/install/uninstall.go index f333e8c49a5..1f64a14011f 100644 --- a/internal/pkg/agent/install/uninstall.go +++ b/internal/pkg/agent/install/uninstall.go @@ -45,39 +45,14 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr if runtime.GOOS == "windows" && paths.HasPrefix(cwd, topPath) { return fmt.Errorf("uninstall must be run from outside the installed path '%s'", topPath) } - // uninstall the current service - // not creating the service, so no need to set the username and group to any value - svc, err := newService(topPath) - if err != nil { - return fmt.Errorf("error creating new service handler: %w", err) - } - status, _ := svc.Status() - pt.Describe("Stopping service") - if status == service.StatusRunning { - err := svc.Stop() - if err != nil { - pt.Describe("Failed to issue stop service") - return aerrors.New( - err, - fmt.Sprintf("failed to issue stop service (%s)", paths.ServiceName), - aerrors.M("service", paths.ServiceName)) - } - } - // The kardianos service manager can't tell the difference - // between 'Stopped' and 'StopPending' on Windows, so make - // sure the service is stopped. - err = isStopped(30*time.Second, 250*time.Millisecond, paths.ServiceName) + // ensure service is stopped + status, err := EnsureStoppedService(topPath, pt) if err != nil { - pt.Describe("Failed to complete stop of service") - return aerrors.New( - err, - fmt.Sprintf("failed to complete stop service (%s)", paths.ServiceName), - aerrors.M("service", paths.ServiceName)) + // context for the error already provided in the EnsureStoppedService function + return err } - pt.Describe("Successfully stopped service") - // kill any running watcher if err := killWatcher(pt); err != nil { return fmt.Errorf("failed trying to kill any running watcher: %w", err) @@ -96,11 +71,9 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr // If service status was running it was stopped to uninstall the components. // If the components uninstall failed start the service again if status == service.StatusRunning { - if startErr := svc.Start(); startErr != nil { - return aerrors.New( - err, - fmt.Sprintf("failed to restart service (%s), after failed components uninstall: %v", paths.ServiceName, startErr), - aerrors.M("service", paths.ServiceName)) + if startErr := StartService(topPath); startErr != nil { + // context for the error already provided in the StartService function + return err } } return fmt.Errorf("error uninstalling components: %w", err) @@ -108,7 +81,7 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr // Uninstall service only after components were uninstalled successfully pt.Describe("Removing service") - err = svc.Uninstall() + err = UninstallService(topPath) // Is there a reason why we don't want to hard-fail on this? if err != nil { pt.Describe(fmt.Sprintf("Failed to Uninstall existing service: %s", err)) @@ -142,6 +115,24 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr return nil } +// EnsureStoppedService ensures that the installed service is stopped. +func EnsureStoppedService(topPath string, pt *progressbar.ProgressBar) (service.Status, error) { + status, _ := StatusService(topPath) + if status == service.StatusRunning { + pt.Describe("Stopping service") + err := StopService(topPath, 30*time.Second, 250*time.Millisecond) + if err != nil { + pt.Describe("Failed to issue stop service") + // context for the error already provided in the StopService function + return status, err + } + pt.Describe("Successfully stopped service") + } else { + pt.Describe("Service already stopped") + } + return status, nil +} + func checkForUnprivilegedVault(ctx context.Context, opts ...vault.OptionFunc) (bool, error) { // check if we have a file vault to detect if we have to use it for reading config opts = append(opts, vault.WithReadonly(true)) diff --git a/internal/pkg/agent/perms/windows.go b/internal/pkg/agent/perms/windows.go index fbd2e26333d..6a2fb208f5e 100644 --- a/internal/pkg/agent/perms/windows.go +++ b/internal/pkg/agent/perms/windows.go @@ -41,7 +41,7 @@ func FixPermissions(topPath string, opts ...OptFunc) error { grants = append(grants, acl.GrantSid(0xF10F0000, administratorsSID)) // full control of all acl's // user gets grant based on the mask - var userSID *windows.SID + userSID := administratorsSID // defaults to owned by Administrators if o.mask&0700 != 0 && o.ownership.UID != "" { userSID, err = windows.StringToSid(o.ownership.UID) if err != nil { @@ -51,7 +51,7 @@ func FixPermissions(topPath string, opts ...OptFunc) error { } // group gets grant based on the mask - var groupSID *windows.SID + groupSID := administratorsSID // defaults to owned by Administrators if o.mask&0070 != 0 && o.ownership.GID != "" { groupSID, err = windows.StringToSid(o.ownership.GID) if err != nil { diff --git a/pkg/component/component.go b/pkg/component/component.go index 954fc373a13..1f3579f1ce5 100644 --- a/pkg/component/component.go +++ b/pkg/component/component.go @@ -21,7 +21,6 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/eql" "github.com/elastic/elastic-agent/pkg/features" "github.com/elastic/elastic-agent/pkg/limits" - "github.com/elastic/elastic-agent/pkg/utils" ) // GenerateMonitoringCfgFn is a function that can inject information into the model generation process. @@ -946,10 +945,6 @@ type outputI struct { // input specification runtime checks. This function should always be // edited in sync with the documentation in specs/README.md. func varsForPlatform(platform PlatformDetail) (*transpiler.Vars, error) { - hasRoot, err := utils.HasRoot() - if err != nil { - return nil, err - } return transpiler.NewVars("", map[string]interface{}{ "install": map[string]interface{}{ "in_default": paths.ArePathsEqual(paths.Top(), paths.InstallPath(paths.DefaultBasePath)) || pkgmgr.InstalledViaExternalPkgMgr(), @@ -964,7 +959,7 @@ func varsForPlatform(platform PlatformDetail) (*transpiler.Vars, error) { "minor": platform.Minor, }, "user": map[string]interface{}{ - "root": hasRoot, + "root": platform.User.Root, }, }, nil) } diff --git a/pkg/component/component_test.go b/pkg/component/component_test.go index 562ecc1450d..f13a2767b4b 100644 --- a/pkg/component/component_test.go +++ b/pkg/component/component_test.go @@ -545,8 +545,12 @@ func TestToComponents(t *testing.T) { InputType: "endpoint", OutputType: "elasticsearch", ID: "endpoint-default", - InputSpec: &InputRuntimeSpec{}, - Err: NewErrInputRuntimeCheckFail("Elastic Defend doesn't support RHEL7 on arm64"), + InputSpec: &InputRuntimeSpec{ + InputType: "endpoint", + BinaryName: "endpoint-security", + BinaryPath: filepath.Join("..", "..", "specs", "endpoint-security"), + }, + Err: NewErrInputRuntimeCheckFail("Elastic Defend doesn't support RHEL7 on arm64"), Units: []Unit{ { ID: "endpoint-default", diff --git a/pkg/component/load.go b/pkg/component/load.go index 39343842dc5..46fe2e4c59f 100644 --- a/pkg/component/load.go +++ b/pkg/component/load.go @@ -289,10 +289,9 @@ func (r *RuntimeSpecs) GetInput(inputType string) (InputRuntimeSpec, error) { return InputRuntimeSpec{}, ErrInputNotSupportedOnPlatform } err := validateRuntimeChecks(&runtimeSpec.Spec.Runtime, r.platform) - if err != nil { - return InputRuntimeSpec{}, err - } - return runtimeSpec, nil + // runtimeSpec is always returned so the caller know which runtime would have been used + // even if the runtime checks return an error + return runtimeSpec, err } // ShippersForOutputType returns the shippers that support the outputType. diff --git a/pkg/component/platforms.go b/pkg/component/platforms.go index d8d69c17877..66b9d6f0e16 100644 --- a/pkg/component/platforms.go +++ b/pkg/component/platforms.go @@ -10,6 +10,8 @@ import ( "strings" "github.com/elastic/go-sysinfo" + + "github.com/elastic/elastic-agent/pkg/utils" ) const ( @@ -98,6 +100,11 @@ func (p Platforms) Exists(platform string) bool { return false } +// UserDetail provides user specific information on the running platform. +type UserDetail struct { + Root bool +} + // PlatformDetail is platform that has more detail information about the running platform. type PlatformDetail struct { Platform @@ -106,6 +113,8 @@ type PlatformDetail struct { Family string Major int Minor int + + User UserDetail } // PlatformModifier can modify the platform details before the runtime specifications are loaded. @@ -113,6 +122,10 @@ type PlatformModifier func(detail PlatformDetail) PlatformDetail // LoadPlatformDetail loads the platform details for the current system. func LoadPlatformDetail(modifiers ...PlatformModifier) (PlatformDetail, error) { + hasRoot, err := utils.HasRoot() + if err != nil { + return PlatformDetail{}, err + } info, err := sysinfo.Host() if err != nil { return PlatformDetail{}, err @@ -139,6 +152,9 @@ func LoadPlatformDetail(modifiers ...PlatformModifier) (PlatformDetail, error) { Family: os.Family, Major: os.Major, Minor: os.Minor, + User: UserDetail{ + Root: hasRoot, + }, } for _, modifier := range modifiers { detail = modifier(detail) diff --git a/pkg/control/v2/client/wait/agent.go b/pkg/control/v2/client/wait/agent.go new file mode 100644 index 00000000000..6073bc38fdc --- /dev/null +++ b/pkg/control/v2/client/wait/agent.go @@ -0,0 +1,60 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package wait + +import ( + "context" + "errors" + "time" +) + +// ForAgent waits for the agent daemon to be able to be communicated with. +func ForAgent(ctx context.Context, timeout time.Duration) error { + if timeout == 0 { + timeout = 1 * time.Minute // default of 1 minute + } + if timeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, timeout) + defer cancel() + } + maxBackoff := timeout + if maxBackoff <= 0 { + // indefinite timeout + maxBackoff = 10 * time.Minute + } + + resChan := make(chan waitResult) + innerCtx, innerCancel := context.WithCancel(context.Background()) + defer innerCancel() + go func() { + backOff := expBackoffWithContext(innerCtx, 1*time.Second, maxBackoff) + for { + backOff.Wait() + _, err := getDaemonState(innerCtx, DefaultDaemonTimeout) + if errors.Is(err, context.Canceled) { + resChan <- waitResult{err: err} + return + } + if err == nil { + resChan <- waitResult{} + break + } + } + }() + + var res waitResult + select { + case <-ctx.Done(): + innerCancel() + res = <-resChan + case res = <-resChan: + } + + if res.err != nil { + return res.err + } + return nil +} diff --git a/pkg/control/v2/client/wait/common.go b/pkg/control/v2/client/wait/common.go new file mode 100644 index 00000000000..56969d562fe --- /dev/null +++ b/pkg/control/v2/client/wait/common.go @@ -0,0 +1,44 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package wait + +import ( + "context" + "time" + + "github.com/elastic/elastic-agent/internal/pkg/core/backoff" + "github.com/elastic/elastic-agent/pkg/control/v2/client" +) + +const ( + // DefaultDaemonTimeout is the default timeout to use for waiting for the daemon. + DefaultDaemonTimeout = 30 * time.Second // max amount of for communication to running Agent daemon +) + +type waitResult struct { + err error +} + +func getDaemonState(ctx context.Context, timeout time.Duration) (*client.AgentState, error) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + daemon := client.New() + err := daemon.Connect(ctx) + if err != nil { + return nil, err + } + defer daemon.Disconnect() + return daemon.State(ctx) +} + +func expBackoffWithContext(ctx context.Context, init, max time.Duration) backoff.Backoff { + signal := make(chan struct{}) + bo := backoff.NewExpBackoff(signal, init, max) + go func() { + <-ctx.Done() + close(signal) + }() + return bo +} diff --git a/testing/integration/endpoint_security_test.go b/testing/integration/endpoint_security_test.go index 15342b3e20f..ccd79a90aec 100644 --- a/testing/integration/endpoint_security_test.go +++ b/testing/integration/endpoint_security_test.go @@ -649,6 +649,76 @@ func TestEndpointSecurityUnprivileged(t *testing.T) { }, 2*time.Minute, 10*time.Second, "Agent never became DEGRADED with root/Administrator install message") } +// Tests that trying to switch from privileged to unprivileged with Elastic Defend fails. +func TestEndpointSecurityCannotSwitchToUnprivileged(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + + // Only supports Linux at the moment. + OS: []define.OS{ + { + Type: define.Linux, + }, + }, + }) + + ctx, cn := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cn() + + // Get path to agent executable. + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + t.Log("Enrolling the agent in Fleet") + policyUUID := uuid.New().String() + createPolicyReq := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, // ensure always privileged + } + policyResp, err := tools.InstallAgentWithPolicy(ctx, t, installOpts, fixture, info.KibanaClient, createPolicyReq) + require.NoErrorf(t, err, "Policy Response was: %v", policyResp) + + t.Log("Installing Elastic Defend") + pkgPolicyResp, err := installElasticDefendPackage(t, info, policyResp.ID) + require.NoErrorf(t, err, "Policy Response was: %v", pkgPolicyResp) + + t.Log("Polling for endpoint-security to become Healthy") + healthyCtx, cancel := context.WithTimeout(ctx, endpointHealthPollingTimeout) + defer cancel() + + agentClient := fixture.Client() + err = agentClient.Connect(healthyCtx) + require.NoError(t, err) + + require.Eventually(t, + func() bool { return agentAndEndpointAreHealthy(t, healthyCtx, agentClient) }, + endpointHealthPollingTimeout, + time.Second, + "Endpoint component or units are not healthy.", + ) + t.Log("Verified endpoint component and units are healthy") + + performSwitchCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) + defer cancel() + output, err := fixture.Exec(performSwitchCtx, []string{"unprivileged", "-f"}) + require.Errorf(t, err, "unprivileged command should have failed") + assert.Contains(t, string(output), "unable to switch to unprivileged mode due to the following service based components having issues") + assert.Contains(t, string(output), "endpoint") +} + // TestEndpointLogsAreCollectedInDiagnostics tests that diagnostics archive contain endpoint logs func TestEndpointLogsAreCollectedInDiagnostics(t *testing.T) { info := define.Require(t, define.Requirements{ diff --git a/testing/integration/switch_privileged_test.go b/testing/integration/switch_privileged_test.go new file mode 100644 index 00000000000..9a5acb73965 --- /dev/null +++ b/testing/integration/switch_privileged_test.go @@ -0,0 +1,139 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/elastic-agent/testing/installtest" + + "github.com/stretchr/testify/require" +) + +func TestSwitchPrivilegedWithoutBasePath(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + // We require sudo for this test to run + // `elastic-agent install`. + Sudo: true, + + // It's not safe to run this test locally as it + // installs Elastic Agent. + Local: false, + }) + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = fixture.Prepare(ctx) + require.NoError(t, err) + + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := &atesting.InstallOpts{Force: true, Privileged: false} + out, err := fixture.Install(ctx, opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in default base path in unprivileged mode + require.NoError(t, installtest.CheckSuccess(ctx, fixture, opts.BasePath, true)) + + // Switch to privileged mode + out, err = fixture.Exec(ctx, []string{"privileged", "-f"}) + if err != nil { + t.Logf("privileged output: %s", out) + require.NoError(t, err) + } + + // Check that Agent is running in default base path in privileged mode + require.NoError(t, installtest.CheckSuccess(ctx, fixture, opts.BasePath, false)) +} + +func TestSwitchPrivilegedWithBasePath(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + // We require sudo for this test to run + // `elastic-agent install`. + Sudo: true, + + // It's not safe to run this test locally as it + // installs Elastic Agent. + Local: false, + }) + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = fixture.Prepare(ctx) + require.NoError(t, err) + + // When running in unprivileged using a base path the + // base needs to be accessible by the `elastic-agent-user` user that will be + // executing the process, but is not created yet. Using a base that exists + // and is known to be accessible by standard users, ensures this tests + // works correctly and will not hit a permission issue when spawning the + // elastic-agent service. + var basePath string + switch runtime.GOOS { + case define.Linux: + basePath = `/usr` + case define.Windows: + basePath = `C:\` + default: + // Set up random temporary directory to serve as base path for Elastic Agent + // installation. + tmpDir := t.TempDir() + basePath = filepath.Join(tmpDir, strings.ToLower(randStr(8))) + } + + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := &atesting.InstallOpts{ + BasePath: basePath, + Force: true, + Privileged: false, + } + out, err := fixture.Install(ctx, opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in the custom base path in unprivileged mode + topPath := filepath.Join(basePath, "Elastic", "Agent") + require.NoError(t, installtest.CheckSuccess(ctx, fixture, topPath, true)) + + // Switch to privileged mode + out, err = fixture.Exec(ctx, []string{"privileged", "-f"}) + if err != nil { + t.Logf("privileged output: %s", out) + require.NoError(t, err) + } + + // Check that Agent is running in the custom base path in privileged mode + require.NoError(t, installtest.CheckSuccess(ctx, fixture, topPath, false)) +} diff --git a/testing/integration/switch_unprivileged_test.go b/testing/integration/switch_unprivileged_test.go new file mode 100644 index 00000000000..9429a59613b --- /dev/null +++ b/testing/integration/switch_unprivileged_test.go @@ -0,0 +1,139 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/elastic-agent/testing/installtest" + + "github.com/stretchr/testify/require" +) + +func TestSwitchUnprivilegedWithoutBasePath(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + // We require sudo for this test to run + // `elastic-agent install`. + Sudo: true, + + // It's not safe to run this test locally as it + // installs Elastic Agent. + Local: false, + }) + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = fixture.Prepare(ctx) + require.NoError(t, err) + + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := &atesting.InstallOpts{Force: true, Privileged: true} + out, err := fixture.Install(ctx, opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in default base path in privileged mode + require.NoError(t, installtest.CheckSuccess(ctx, fixture, opts.BasePath, false)) + + // Switch to unprivileged mode + out, err = fixture.Exec(ctx, []string{"unprivileged", "-f"}) + if err != nil { + t.Logf("unprivileged output: %s", out) + require.NoError(t, err) + } + + // Check that Agent is running in default base path in unprivileged mode + require.NoError(t, installtest.CheckSuccess(ctx, fixture, opts.BasePath, true)) +} + +func TestSwitchUnprivilegedWithBasePath(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + // We require sudo for this test to run + // `elastic-agent install`. + Sudo: true, + + // It's not safe to run this test locally as it + // installs Elastic Agent. + Local: false, + }) + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = fixture.Prepare(ctx) + require.NoError(t, err) + + // When running in unprivileged using a base path the + // base needs to be accessible by the `elastic-agent-user` user that will be + // executing the process, but is not created yet. Using a base that exists + // and is known to be accessible by standard users, ensures this tests + // works correctly and will not hit a permission issue when spawning the + // elastic-agent service. + var basePath string + switch runtime.GOOS { + case define.Linux: + basePath = `/usr` + case define.Windows: + basePath = `C:\` + default: + // Set up random temporary directory to serve as base path for Elastic Agent + // installation. + tmpDir := t.TempDir() + basePath = filepath.Join(tmpDir, strings.ToLower(randStr(8))) + } + + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := &atesting.InstallOpts{ + BasePath: basePath, + Force: true, + Privileged: true, + } + out, err := fixture.Install(ctx, opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in the custom base path in privileged mode + topPath := filepath.Join(basePath, "Elastic", "Agent") + require.NoError(t, installtest.CheckSuccess(ctx, fixture, topPath, false)) + + // Switch to unprivileged mode + out, err = fixture.Exec(ctx, []string{"unprivileged", "-f"}) + if err != nil { + t.Logf("unprivileged output: %s", out) + require.NoError(t, err) + } + + // Check that Agent is running in the custom base path in unprivileged mode + require.NoError(t, installtest.CheckSuccess(ctx, fixture, topPath, true)) +} diff --git a/testing/integration/upgrade_rollback_test.go b/testing/integration/upgrade_rollback_test.go index c079a51f3e8..b27f74d0b22 100644 --- a/testing/integration/upgrade_rollback_test.go +++ b/testing/integration/upgrade_rollback_test.go @@ -205,7 +205,7 @@ func TestStandaloneUpgradeRollbackOnRestarts(t *testing.T) { topPath := paths.Top() t.Logf("Stopping agent via service to simulate crashing") - err = install.StopService(topPath) + err = install.StopService(topPath, install.DefaultStopTimeout, install.DefaultStopInterval) if err != nil && runtime.GOOS == define.Windows && strings.Contains(err.Error(), "The service has not been started.") { // Due to the quick restarts every 10 seconds its possible that this is faster than Windows // can handle. Decrementing restartIdx means that the loop will occur again.