From 3318bf0a3bdea893352d3cca8e3b4f983af26b6b Mon Sep 17 00:00:00 2001 From: Corbin Phelps Date: Tue, 18 Oct 2022 20:28:23 -0400 Subject: [PATCH 1/8] Added cleanup if service errors on Start Signed-off-by: Corbin Phelps --- .chloggen/service-cleanup-on-start-error.yaml | 16 ++++++++++++++++ service/collector.go | 8 +++----- service/service.go | 4 ++++ 3 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 .chloggen/service-cleanup-on-start-error.yaml diff --git a/.chloggen/service-cleanup-on-start-error.yaml b/.chloggen/service-cleanup-on-start-error.yaml new file mode 100644 index 00000000000..f0454baaf59 --- /dev/null +++ b/.chloggen/service-cleanup-on-start-error.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) +component: collector + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fixed collector service not cleaning up if it failed during Start + +# One or more tracking issues or pull requests related to the change +issues: [] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/service/collector.go b/service/collector.go index 1976e70a4b6..db8d14ac985 100644 --- a/service/collector.go +++ b/service/collector.go @@ -153,6 +153,9 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error { } if err = col.service.Start(ctx); err != nil { + if shutdownErr := col.service.Shutdown(ctx); shutdownErr != nil { + return multierr.Append(err, fmt.Errorf("failed to shutdown service after error: %w", shutdownErr)) + } return err } col.setCollectorState(Running) @@ -223,11 +226,6 @@ func (col *Collector) shutdown(ctx context.Context) error { errs = multierr.Append(errs, fmt.Errorf("failed to shutdown service: %w", err)) } - // TODO: Move this as part of the service shutdown. - if err := col.service.telemetryInitializer.shutdown(); err != nil { - errs = multierr.Append(errs, fmt.Errorf("failed to shutdown collector telemetry: %w", err)) - } - col.setCollectorState(Closed) return errs diff --git a/service/service.go b/service/service.go index eca81b427cc..d9b9180fce8 100644 --- a/service/service.go +++ b/service/service.go @@ -84,6 +84,7 @@ func newService(set *settings) (*service, error) { return srv, nil } +// Start starts the extensions and pipelines. If Start fails Shutdown should be called to ensure a clean state. func (srv *service) Start(ctx context.Context) error { srv.telemetrySettings.Logger.Info("Starting "+srv.buildInfo.Command+"...", zap.String("Version", srv.buildInfo.Version), @@ -131,6 +132,9 @@ func (srv *service) Shutdown(ctx context.Context) error { errs = multierr.Append(errs, fmt.Errorf("failed to shutdown telemetry: %w", err)) } + if err := srv.telemetryInitializer.shutdown(); err != nil { + errs = multierr.Append(errs, fmt.Errorf("failed to shutdown telemetry initializer: %w", err)) + } // TODO: Shutdown MeterProvider. return errs } From e187b697b064dc4051fa55b818b477f355504eda Mon Sep 17 00:00:00 2001 From: Corbin Phelps Date: Tue, 18 Oct 2022 20:54:26 -0400 Subject: [PATCH 2/8] Added issue to changelog Signed-off-by: Corbin Phelps --- .chloggen/service-cleanup-on-start-error.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.chloggen/service-cleanup-on-start-error.yaml b/.chloggen/service-cleanup-on-start-error.yaml index f0454baaf59..8c0692a9b34 100644 --- a/.chloggen/service-cleanup-on-start-error.yaml +++ b/.chloggen/service-cleanup-on-start-error.yaml @@ -8,7 +8,7 @@ component: collector note: Fixed collector service not cleaning up if it failed during Start # One or more tracking issues or pull requests related to the change -issues: [] +issues: [6352] # (Optional) One or more lines of additional information to render under the primary note. # These lines will be padded with 2 spaces and then inserted directly into the document. From 5a0aeca35ba025a174458f986e55f8dfb807b0e7 Mon Sep 17 00:00:00 2001 From: Corbin Phelps Date: Tue, 25 Oct 2022 11:48:12 -0400 Subject: [PATCH 3/8] Added a test to verify the reusablility of telemetryInitializer in services Signed-off-by: Corbin Phelps --- service/service_test.go | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/service/service_test.go b/service/service_test.go index a7b579018e8..449d31c471d 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -137,6 +137,56 @@ func TestServiceTelemetryCleanupOnError(t *testing.T) { } +// TestServiceTelemetryReusable tests that a single telemetryInitializer can be reused in multiple services +func TestServiceTelemetryReusable(t *testing.T) { + factories, err := componenttest.NopFactories() + require.NoError(t, err) + + // Read valid yaml config from file + validProvider, err := NewConfigProvider(newDefaultConfigProviderSettings([]string{filepath.Join("testdata", "otelcol-nop.yaml")})) + require.NoError(t, err) + validCfg, err := validProvider.Get(context.Background(), factories) + require.NoError(t, err) + + // Create a service + telemetry := newColTelemetry(featuregate.NewRegistry()) + srvOne, err := newService(&settings{ + BuildInfo: component.NewDefaultBuildInfo(), + Factories: factories, + Config: validCfg, + telemetry: telemetry, + }) + require.NoError(t, err) + + // Start the service + require.NoError(t, srvOne.Start(context.Background())) + + // Shutdown the service + require.NoError(t, srvOne.Shutdown(context.Background())) + + // Create a new service with the same telemetry + srvTwo, err := newService(&settings{ + BuildInfo: component.NewDefaultBuildInfo(), + Factories: factories, + Config: validCfg, + telemetry: telemetry, + }) + require.NoError(t, err) + + // For safety ensure everything is cleaned up + t.Cleanup(func() { + assert.NoError(t, telemetry.shutdown()) + assert.NoError(t, srvOne.Shutdown(context.Background())) + assert.NoError(t, srvTwo.Shutdown(context.Background())) + }) + + // Start the new service + require.NoError(t, srvTwo.Start(context.Background())) + + // Shutdown the new service + require.NoError(t, srvTwo.Shutdown(context.Background())) +} + func createExampleService(t *testing.T, factories component.Factories) *service { // Read yaml config from file prov, err := NewConfigProvider(newDefaultConfigProviderSettings([]string{filepath.Join("testdata", "otelcol-nop.yaml")})) From 768d8a3db5be8b475c07e590e3a2124cfcaee5cc Mon Sep 17 00:00:00 2001 From: Corbin Phelps Date: Tue, 25 Oct 2022 14:56:13 -0400 Subject: [PATCH 4/8] Moved telemetryInitializer shutdown out of service shutdown Signed-off-by: Corbin Phelps --- service/collector.go | 15 +++++++++++++-- service/service.go | 3 --- service/service_test.go | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/service/collector.go b/service/collector.go index db8d14ac985..caf52c33f43 100644 --- a/service/collector.go +++ b/service/collector.go @@ -153,10 +153,16 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error { } if err = col.service.Start(ctx); err != nil { + errs := err if shutdownErr := col.service.Shutdown(ctx); shutdownErr != nil { - return multierr.Append(err, fmt.Errorf("failed to shutdown service after error: %w", shutdownErr)) + errs = multierr.Append(err, fmt.Errorf("failed to shutdown service after error: %w", shutdownErr)) } - return err + + // TODO: Move this as part of the service shutdown. + if shutdownErr := col.service.telemetryInitializer.shutdown(); shutdownErr != nil { + errs = multierr.Append(errs, fmt.Errorf("failed to shutdown collector telemetry: %w", shutdownErr)) + } + return errs } col.setCollectorState(Running) return nil @@ -226,6 +232,11 @@ func (col *Collector) shutdown(ctx context.Context) error { errs = multierr.Append(errs, fmt.Errorf("failed to shutdown service: %w", err)) } + // TODO: Move this as part of the service shutdown. + if err := col.service.telemetryInitializer.shutdown(); err != nil { + errs = multierr.Append(errs, fmt.Errorf("failed to shutdown collector telemetry: %w", err)) + } + col.setCollectorState(Closed) return errs diff --git a/service/service.go b/service/service.go index d9b9180fce8..77e56db5be8 100644 --- a/service/service.go +++ b/service/service.go @@ -132,9 +132,6 @@ func (srv *service) Shutdown(ctx context.Context) error { errs = multierr.Append(errs, fmt.Errorf("failed to shutdown telemetry: %w", err)) } - if err := srv.telemetryInitializer.shutdown(); err != nil { - errs = multierr.Append(errs, fmt.Errorf("failed to shutdown telemetry initializer: %w", err)) - } // TODO: Shutdown MeterProvider. return errs } diff --git a/service/service_test.go b/service/service_test.go index 449d31c471d..18f7df3ae36 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -16,6 +16,8 @@ package service import ( "context" + "fmt" + "net/http" "path/filepath" "testing" @@ -150,6 +152,7 @@ func TestServiceTelemetryReusable(t *testing.T) { // Create a service telemetry := newColTelemetry(featuregate.NewRegistry()) + srvOne, err := newService(&settings{ BuildInfo: component.NewDefaultBuildInfo(), Factories: factories, @@ -158,9 +161,20 @@ func TestServiceTelemetryReusable(t *testing.T) { }) require.NoError(t, err) + // Setup tu curl the telemetry URL to ensure it works + telemetryURL := fmt.Sprintf("http://%s/metrics", telemetry.server.Addr) + // Start the service require.NoError(t, srvOne.Start(context.Background())) + // check telemetry server to ensure we get a response + var resp *http.Response + + // #nosec G107 + resp, err = http.Get(telemetryURL) + assert.NoError(t, err) + assert.Equal(t, http.StatusOK, resp.StatusCode) + // Shutdown the service require.NoError(t, srvOne.Shutdown(context.Background())) @@ -183,6 +197,12 @@ func TestServiceTelemetryReusable(t *testing.T) { // Start the new service require.NoError(t, srvTwo.Start(context.Background())) + // check telemetry server to ensure we get a response + // #nosec G107 + resp, err = http.Get(telemetryURL) + assert.NoError(t, err) + assert.Equal(t, http.StatusOK, resp.StatusCode) + // Shutdown the new service require.NoError(t, srvTwo.Shutdown(context.Background())) } From 0e5db510049e1897a515b4eed32cc7ca72b50b5a Mon Sep 17 00:00:00 2001 From: Corbin Phelps Date: Tue, 25 Oct 2022 15:06:26 -0400 Subject: [PATCH 5/8] Fixed comment Signed-off-by: Corbin Phelps --- service/service_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/service_test.go b/service/service_test.go index 18f7df3ae36..bcd5d2e7b8a 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -161,7 +161,7 @@ func TestServiceTelemetryReusable(t *testing.T) { }) require.NoError(t, err) - // Setup tu curl the telemetry URL to ensure it works + // URL of the telemetry service metrics endpoint telemetryURL := fmt.Sprintf("http://%s/metrics", telemetry.server.Addr) // Start the service From 8fbe44707622ea80824627bb71837efcff49431c Mon Sep 17 00:00:00 2001 From: Corbin Phelps Date: Tue, 25 Oct 2022 15:48:27 -0400 Subject: [PATCH 6/8] Bundled service shutdown and telemetry into a helper function in the collector Signed-off-by: Corbin Phelps --- service/collector.go | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/service/collector.go b/service/collector.go index caf52c33f43..55b0df1f9a7 100644 --- a/service/collector.go +++ b/service/collector.go @@ -153,16 +153,9 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error { } if err = col.service.Start(ctx); err != nil { - errs := err - if shutdownErr := col.service.Shutdown(ctx); shutdownErr != nil { - errs = multierr.Append(err, fmt.Errorf("failed to shutdown service after error: %w", shutdownErr)) + if shutdownErr := col.shutdownServiceAndTelemetry(ctx); shutdownErr != nil { + return multierr.Append(err, shutdownErr) } - - // TODO: Move this as part of the service shutdown. - if shutdownErr := col.service.telemetryInitializer.shutdown(); shutdownErr != nil { - errs = multierr.Append(errs, fmt.Errorf("failed to shutdown collector telemetry: %w", shutdownErr)) - } - return errs } col.setCollectorState(Running) return nil @@ -228,17 +221,30 @@ func (col *Collector) shutdown(ctx context.Context) error { errs = multierr.Append(errs, fmt.Errorf("failed to shutdown config provider: %w", err)) } + if err := col.shutdownServiceAndTelemetry(ctx); err != nil { + errs = multierr.Append(errs, err) + } + + col.setCollectorState(Closed) + + return errs +} + +// shutdownServiceAndTelemetry bundles shutting down the service and telemetryInitializer. +// Returned error will be in multierr form and wrapped. +func (col *Collector) shutdownServiceAndTelemetry(ctx context.Context) error { + var errs error + + // shutdown service if err := col.service.Shutdown(ctx); err != nil { - errs = multierr.Append(errs, fmt.Errorf("failed to shutdown service: %w", err)) + errs = multierr.Append(err, fmt.Errorf("failed to shutdown service after error: %w", err)) } // TODO: Move this as part of the service shutdown. + // shutdown telemetryInitializer if err := col.service.telemetryInitializer.shutdown(); err != nil { errs = multierr.Append(errs, fmt.Errorf("failed to shutdown collector telemetry: %w", err)) } - - col.setCollectorState(Closed) - return errs } From 34c6954f241a9c42b0987602817786f7f7b144be Mon Sep 17 00:00:00 2001 From: Corbin Phelps Date: Wed, 26 Oct 2022 08:02:47 -0400 Subject: [PATCH 7/8] PR Fixups Signed-off-by: Corbin Phelps --- service/collector.go | 8 ++------ service/service_test.go | 11 ++++------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/service/collector.go b/service/collector.go index 55b0df1f9a7..05fef997ee0 100644 --- a/service/collector.go +++ b/service/collector.go @@ -153,9 +153,7 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error { } if err = col.service.Start(ctx); err != nil { - if shutdownErr := col.shutdownServiceAndTelemetry(ctx); shutdownErr != nil { - return multierr.Append(err, shutdownErr) - } + return multierr.Append(err, col.shutdownServiceAndTelemetry(ctx)) } col.setCollectorState(Running) return nil @@ -221,9 +219,7 @@ func (col *Collector) shutdown(ctx context.Context) error { errs = multierr.Append(errs, fmt.Errorf("failed to shutdown config provider: %w", err)) } - if err := col.shutdownServiceAndTelemetry(ctx); err != nil { - errs = multierr.Append(errs, err) - } + errs = multierr.Append(errs, col.shutdownServiceAndTelemetry(ctx)) col.setCollectorState(Closed) diff --git a/service/service_test.go b/service/service_test.go index bcd5d2e7b8a..59ac56b000f 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -152,6 +152,10 @@ func TestServiceTelemetryReusable(t *testing.T) { // Create a service telemetry := newColTelemetry(featuregate.NewRegistry()) + // For safety ensure everything is cleaned up + t.Cleanup(func() { + assert.NoError(t, telemetry.shutdown()) + }) srvOne, err := newService(&settings{ BuildInfo: component.NewDefaultBuildInfo(), @@ -187,13 +191,6 @@ func TestServiceTelemetryReusable(t *testing.T) { }) require.NoError(t, err) - // For safety ensure everything is cleaned up - t.Cleanup(func() { - assert.NoError(t, telemetry.shutdown()) - assert.NoError(t, srvOne.Shutdown(context.Background())) - assert.NoError(t, srvTwo.Shutdown(context.Background())) - }) - // Start the new service require.NoError(t, srvTwo.Start(context.Background())) From af2d6e00659490b05dac82fa4f24170082727e1f Mon Sep 17 00:00:00 2001 From: Corbin Phelps Date: Wed, 26 Oct 2022 12:41:32 -0400 Subject: [PATCH 8/8] Update service/collector.go Co-authored-by: Bogdan Drutu --- service/collector.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/collector.go b/service/collector.go index 05fef997ee0..149da9e1e0b 100644 --- a/service/collector.go +++ b/service/collector.go @@ -233,7 +233,7 @@ func (col *Collector) shutdownServiceAndTelemetry(ctx context.Context) error { // shutdown service if err := col.service.Shutdown(ctx); err != nil { - errs = multierr.Append(err, fmt.Errorf("failed to shutdown service after error: %w", err)) + errs = multierr.Append(errs, fmt.Errorf("failed to shutdown service after error: %w", err)) } // TODO: Move this as part of the service shutdown.