diff --git a/changelog/fragments/1730284359-assume-installed-agent-on-upgrade.yaml b/changelog/fragments/1730284359-assume-installed-agent-on-upgrade.yaml new file mode 100644 index 00000000000..25a6db9a27a --- /dev/null +++ b/changelog/fragments/1730284359-assume-installed-agent-on-upgrade.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: This PR assumes installed agent on upgrade in order to initialize control path properly. + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: elastic-agent + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/owner/repo/5879 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/owner/repo/5872 diff --git a/internal/pkg/agent/application/paths/paths_test.go b/internal/pkg/agent/application/paths/paths_test.go index 62c49f65ef3..08efd7e802c 100644 --- a/internal/pkg/agent/application/paths/paths_test.go +++ b/internal/pkg/agent/application/paths/paths_test.go @@ -5,10 +5,12 @@ package paths import ( + "fmt" "runtime" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestEqual(t *testing.T) { @@ -92,3 +94,58 @@ func TestHasPrefixWindows(t *testing.T) { }) } } + +func TestResolveControlSocket(t *testing.T) { + testCases := []struct { + os string + controlSocketSame bool + runningInstalled bool + controlSocketPathChangeTo string // in case of a change + }{ + {"darwin", false, false, ""}, + {"darwin", true, false, ""}, + {"darwin", false, true, ""}, + {"darwin", true, true, ""}, + + {"linux", false, false, ""}, + {"linux", true, false, ""}, + {"linux", false, true, ""}, + {"linux", true, true, ""}, + + {"windows", false, false, ""}, + {"windows", true, false, ""}, + {"windows", false, true, ""}, + {"windows", true, true, WindowsControlSocketInstalledPath}, + } + + for i, tc := range testCases { + if runtime.GOOS != tc.os { + continue + } + t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { + prevControlSocketPath := controlSocketPath + prevTopPath := topPath + defer func() { + // just to be sure + controlSocketPath = prevControlSocketPath + topPath = prevTopPath + }() + + topPath = "/top" + controlSocketPath = ControlSocketFromPath(tc.os, "/top") + if !tc.controlSocketSame { + controlSocketPath = "/custom/socket/path" + } + + expecteSocketPath := controlSocketPath + if tc.controlSocketPathChangeTo != "" { + expecteSocketPath = tc.controlSocketPathChangeTo + } + + ResolveControlSocket(tc.runningInstalled) + + require.Equal(t, expecteSocketPath, controlSocketPath) + }) + + } +} diff --git a/internal/pkg/agent/application/paths/paths_unix.go b/internal/pkg/agent/application/paths/paths_unix.go index a667d725ffa..08cfdd4ab9a 100644 --- a/internal/pkg/agent/application/paths/paths_unix.go +++ b/internal/pkg/agent/application/paths/paths_unix.go @@ -33,7 +33,7 @@ func initialControlSocketPath(topPath string) string { } // ResolveControlSocket does nothing on non-Windows hosts. -func ResolveControlSocket() {} +func ResolveControlSocket(_ bool) {} // HasPrefix tests if the path starts with the prefix. func HasPrefix(path string, prefix string) bool { diff --git a/internal/pkg/agent/application/paths/paths_windows.go b/internal/pkg/agent/application/paths/paths_windows.go index c252f311dba..8d5c78d47a0 100644 --- a/internal/pkg/agent/application/paths/paths_windows.go +++ b/internal/pkg/agent/application/paths/paths_windows.go @@ -64,9 +64,9 @@ func initialControlSocketPath(topPath string) string { // Called during the upgrade process from pre-8.8 versions. In pre-8.8 versions the // RunningInstalled will always be false, even when it is an installed version. Once // that is fixed from the upgrade process the control socket path needs to be updated. -func ResolveControlSocket() { +func ResolveControlSocket(runningInstalled bool) { currentPath := ControlSocket() - if currentPath == ControlSocketFromPath(runtime.GOOS, topPath) && RunningInstalled() { + if currentPath == ControlSocketFromPath(runtime.GOOS, topPath) && runningInstalled { // path is not correct being that it's installed // reset the control socket path to be the installed path SetControlSocket(WindowsControlSocketInstalledPath) diff --git a/internal/pkg/agent/application/upgrade/watcher.go b/internal/pkg/agent/application/upgrade/watcher.go index 38089000d47..df7ee03df70 100644 --- a/internal/pkg/agent/application/upgrade/watcher.go +++ b/internal/pkg/agent/application/upgrade/watcher.go @@ -12,6 +12,7 @@ import ( "google.golang.org/grpc" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/core/logger" ) @@ -50,6 +51,11 @@ type AgentWatcher struct { // NewAgentWatcher creates a new agent watcher. func NewAgentWatcher(ch chan error, log *logger.Logger, checkInterval time.Duration) *AgentWatcher { + // when starting watcher from pre 8.8 version of agent control socket is evaluated incorrectly and upgrade fails. + // resolving control socket updates it to a proper value before client is initiated + // upgrade is only available for installed agent so we can assume + paths.ResolveControlSocket(true) + c := client.New() ec := &AgentWatcher{ notifyChan: ch, diff --git a/internal/pkg/agent/cmd/run.go b/internal/pkg/agent/cmd/run.go index d0d231267f9..7cdf1e9db66 100644 --- a/internal/pkg/agent/cmd/run.go +++ b/internal/pkg/agent/cmd/run.go @@ -732,7 +732,8 @@ func ensureInstallMarkerPresent() error { // in the `paths.ControlSocket()` in returning the incorrect control socket (only on Windows). // Now that the install marker has been created we need to ensure that `paths.ControlSocket()` will // return the correct result. - paths.ResolveControlSocket() + // We are being upgraded, we're running as installed, marker was just created. + paths.ResolveControlSocket(true) return nil } diff --git a/testing/upgradetest/watcher.go b/testing/upgradetest/watcher.go index a2563400fd3..c5684382b3f 100644 --- a/testing/upgradetest/watcher.go +++ b/testing/upgradetest/watcher.go @@ -15,9 +15,10 @@ import ( ) // FastWatcherCfg is configuration that makes the watcher run faster. +// we need to set grace period to 100s to be able to detect 5 failures 15 seconds apart and have a little buffer. const FastWatcherCfg = ` agent.upgrade.watcher: - grace_period: 1m + grace_period: 100s error_check.interval: 15s crash_check.interval: 15s `