From 659ba41767d92fe1bc2a10ecfc4c729ee9e9ba60 Mon Sep 17 00:00:00 2001 From: Dakota Paasman <122491662+dpaasman00@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:13:21 -0400 Subject: [PATCH] [opampsupervisor] Add HealthCheckPort configuration parameter (#34704) **Description:** Add a new configuration parameter to `agent` called `health_check_port`. If this is set, then the supervisor will configure the agent's healthcheck extension to use the given port. If it is unset, then we will grab a random port same as before. **Link to tracking Issue:** #34643 **Testing:** - Updated config validation tests - Verified that healthcheck extension is configured with the correct port and works as expected --- ...ervisor-healthcheck-port-configurable.yaml | 27 +++++++ cmd/opampsupervisor/e2e_test.go | 9 ++- .../supervisor/config/config.go | 5 ++ .../supervisor/config/config_test.go | 76 +++++++++++++++++++ cmd/opampsupervisor/supervisor/supervisor.go | 9 ++- .../supervisor_healthcheck_port.yaml | 19 +++++ 6 files changed, 138 insertions(+), 7 deletions(-) create mode 100644 .chloggen/supervisor-healthcheck-port-configurable.yaml create mode 100644 cmd/opampsupervisor/testdata/supervisor/supervisor_healthcheck_port.yaml diff --git a/.chloggen/supervisor-healthcheck-port-configurable.yaml b/.chloggen/supervisor-healthcheck-port-configurable.yaml new file mode 100644 index 000000000000..e0e137835d0a --- /dev/null +++ b/.chloggen/supervisor-healthcheck-port-configurable.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: opampsupervisor + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Add new config parameter `agent.health_check_port` to allow configuring the port used by the agent healthcheck extension." + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [34643] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/cmd/opampsupervisor/e2e_test.go b/cmd/opampsupervisor/e2e_test.go index 22a1bcb755bd..9316e8ffde8a 100644 --- a/cmd/opampsupervisor/e2e_test.go +++ b/cmd/opampsupervisor/e2e_test.go @@ -337,8 +337,9 @@ func TestSupervisorStartsWithNoOpAMPServer(t *testing.T) { // The supervisor is started without a running OpAMP server. // The supervisor should start successfully, even if the OpAMP server is stopped. - s := newSupervisor(t, "basic", map[string]string{ - "url": server.addr, + s := newSupervisor(t, "healthcheck_port", map[string]string{ + "url": server.addr, + "healthcheck_port": "12345", }) require.Nil(t, s.Start()) @@ -346,9 +347,9 @@ func TestSupervisorStartsWithNoOpAMPServer(t *testing.T) { // Verify the collector is running by checking the metrics endpoint require.Eventually(t, func() bool { - resp, err := http.DefaultClient.Get("http://localhost:8888/metrics") + resp, err := http.DefaultClient.Get("http://localhost:12345") if err != nil { - t.Logf("Failed check for prometheus metrics: %s", err) + t.Logf("Failed agent healthcheck request: %s", err) return false } require.NoError(t, resp.Body.Close()) diff --git a/cmd/opampsupervisor/supervisor/config/config.go b/cmd/opampsupervisor/supervisor/config/config.go index 60244e9d9c9e..7e8d2124c356 100644 --- a/cmd/opampsupervisor/supervisor/config/config.go +++ b/cmd/opampsupervisor/supervisor/config/config.go @@ -121,6 +121,7 @@ type Agent struct { Executable string OrphanDetectionInterval time.Duration `mapstructure:"orphan_detection_interval"` Description AgentDescription `mapstructure:"description"` + HealthCheckPort int `mapstructure:"health_check_port"` } func (a Agent) Validate() error { @@ -128,6 +129,10 @@ func (a Agent) Validate() error { return errors.New("agent::orphan_detection_interval must be positive") } + if a.HealthCheckPort < 0 || a.HealthCheckPort > 65535 { + return errors.New("agent::health_check_port must be a valid port number") + } + if a.Executable == "" { return errors.New("agent::executable must be specified") } diff --git a/cmd/opampsupervisor/supervisor/config/config_test.go b/cmd/opampsupervisor/supervisor/config/config_test.go index afc3e9c0f462..776523ab0646 100644 --- a/cmd/opampsupervisor/supervisor/config/config_test.go +++ b/cmd/opampsupervisor/supervisor/config/config_test.go @@ -223,6 +223,82 @@ func TestValidate(t *testing.T) { }, expectedError: "agent::orphan_detection_interval must be positive", }, + { + name: "Invalid port number", + config: Supervisor{ + Server: OpAMPServer{ + Endpoint: "wss://localhost:9090/opamp", + Headers: http.Header{ + "Header1": []string{"HeaderValue"}, + }, + TLSSetting: configtls.ClientConfig{ + Insecure: true, + }, + }, + Agent: Agent{ + Executable: "${file_path}", + OrphanDetectionInterval: 5 * time.Second, + HealthCheckPort: 65536, + }, + Capabilities: Capabilities{ + AcceptsRemoteConfig: true, + }, + Storage: Storage{ + Directory: "/etc/opamp-supervisor/storage", + }, + }, + expectedError: "agent::health_check_port must be a valid port number", + }, + { + name: "Zero value port number", + config: Supervisor{ + Server: OpAMPServer{ + Endpoint: "wss://localhost:9090/opamp", + Headers: http.Header{ + "Header1": []string{"HeaderValue"}, + }, + TLSSetting: configtls.ClientConfig{ + Insecure: true, + }, + }, + Agent: Agent{ + Executable: "${file_path}", + OrphanDetectionInterval: 5 * time.Second, + HealthCheckPort: 0, + }, + Capabilities: Capabilities{ + AcceptsRemoteConfig: true, + }, + Storage: Storage{ + Directory: "/etc/opamp-supervisor/storage", + }, + }, + }, + { + name: "Normal port number", + config: Supervisor{ + Server: OpAMPServer{ + Endpoint: "wss://localhost:9090/opamp", + Headers: http.Header{ + "Header1": []string{"HeaderValue"}, + }, + TLSSetting: configtls.ClientConfig{ + Insecure: true, + }, + }, + Agent: Agent{ + Executable: "${file_path}", + OrphanDetectionInterval: 5 * time.Second, + HealthCheckPort: 29848, + }, + Capabilities: Capabilities{ + AcceptsRemoteConfig: true, + }, + Storage: Storage{ + Directory: "/etc/opamp-supervisor/storage", + }, + }, + }, } // create some fake files for validating agent config diff --git a/cmd/opampsupervisor/supervisor/supervisor.go b/cmd/opampsupervisor/supervisor/supervisor.go index 804face7d1ae..2521a413825c 100644 --- a/cmd/opampsupervisor/supervisor/supervisor.go +++ b/cmd/opampsupervisor/supervisor/supervisor.go @@ -179,10 +179,13 @@ func (s *Supervisor) Start() error { return fmt.Errorf("could not get bootstrap info from the Collector: %w", err) } - healthCheckPort, err := s.findRandomPort() + healthCheckPort := s.config.Agent.HealthCheckPort + if healthCheckPort == 0 { + healthCheckPort, err = s.findRandomPort() - if err != nil { - return fmt.Errorf("could not find port for health check: %w", err) + if err != nil { + return fmt.Errorf("could not find port for health check: %w", err) + } } s.agentHealthCheckEndpoint = fmt.Sprintf("localhost:%d", healthCheckPort) diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_healthcheck_port.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_healthcheck_port.yaml new file mode 100644 index 000000000000..10ba3976615e --- /dev/null +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_healthcheck_port.yaml @@ -0,0 +1,19 @@ +server: + endpoint: ws://{{.url}}/v1/opamp + tls: + insecure: true + +capabilities: + reports_effective_config: true + reports_own_metrics: true + reports_health: true + accepts_remote_config: true + reports_remote_config: true + accepts_restart_command: true + +storage: + directory: "{{.storage_dir}}" + +agent: + executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}} + health_check_port: "{{ .healthcheck_port }}"