Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added cleanup if service errors on Start #6351

Merged
merged 8 commits into from
Oct 26, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .chloggen/service-cleanup-on-start-error.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: collector

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fixed collector service not cleaning up if it failed during Start

# One or more tracking issues or pull requests related to the change
issues: [6352]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
21 changes: 16 additions & 5 deletions service/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error {
}

if err = col.service.Start(ctx); err != nil {
return err
return multierr.Append(err, col.shutdownServiceAndTelemetry(ctx))
}
col.setCollectorState(Running)
return nil
Expand Down Expand Up @@ -219,17 +219,28 @@ func (col *Collector) shutdown(ctx context.Context) error {
errs = multierr.Append(errs, fmt.Errorf("failed to shutdown config provider: %w", err))
}

errs = multierr.Append(errs, col.shutdownServiceAndTelemetry(ctx))

col.setCollectorState(Closed)

return errs
}

// shutdownServiceAndTelemetry bundles shutting down the service and telemetryInitializer.
// Returned error will be in multierr form and wrapped.
func (col *Collector) shutdownServiceAndTelemetry(ctx context.Context) error {
var errs error

// shutdown service
if err := col.service.Shutdown(ctx); err != nil {
errs = multierr.Append(errs, fmt.Errorf("failed to shutdown service: %w", err))
errs = multierr.Append(err, fmt.Errorf("failed to shutdown service after error: %w", err))
cpheps marked this conversation as resolved.
Show resolved Hide resolved
}

// TODO: Move this as part of the service shutdown.
// shutdown telemetryInitializer
if err := col.service.telemetryInitializer.shutdown(); err != nil {
errs = multierr.Append(errs, fmt.Errorf("failed to shutdown collector telemetry: %w", err))
}

col.setCollectorState(Closed)

return errs
}

Expand Down
1 change: 1 addition & 0 deletions service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ func newService(set *settings) (*service, error) {
return srv, nil
}

// Start starts the extensions and pipelines. If Start fails Shutdown should be called to ensure a clean state.
func (srv *service) Start(ctx context.Context) error {
srv.telemetrySettings.Logger.Info("Starting "+srv.buildInfo.Command+"...",
zap.String("Version", srv.buildInfo.Version),
Expand Down
67 changes: 67 additions & 0 deletions service/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package service

import (
"context"
"fmt"
"net/http"
"path/filepath"
"testing"

Expand Down Expand Up @@ -137,6 +139,71 @@ func TestServiceTelemetryCleanupOnError(t *testing.T) {

}

// TestServiceTelemetryReusable tests that a single telemetryInitializer can be reused in multiple services
func TestServiceTelemetryReusable(t *testing.T) {
factories, err := componenttest.NopFactories()
require.NoError(t, err)

// Read valid yaml config from file
validProvider, err := NewConfigProvider(newDefaultConfigProviderSettings([]string{filepath.Join("testdata", "otelcol-nop.yaml")}))
require.NoError(t, err)
validCfg, err := validProvider.Get(context.Background(), factories)
require.NoError(t, err)

// Create a service
telemetry := newColTelemetry(featuregate.NewRegistry())
// For safety ensure everything is cleaned up
t.Cleanup(func() {
assert.NoError(t, telemetry.shutdown())
})

srvOne, err := newService(&settings{
BuildInfo: component.NewDefaultBuildInfo(),
Factories: factories,
Config: validCfg,
telemetry: telemetry,
})
require.NoError(t, err)

// URL of the telemetry service metrics endpoint
telemetryURL := fmt.Sprintf("http://%s/metrics", telemetry.server.Addr)

// Start the service
require.NoError(t, srvOne.Start(context.Background()))

// check telemetry server to ensure we get a response
var resp *http.Response

// #nosec G107
resp, err = http.Get(telemetryURL)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)

// Shutdown the service
require.NoError(t, srvOne.Shutdown(context.Background()))

// Create a new service with the same telemetry
srvTwo, err := newService(&settings{
BuildInfo: component.NewDefaultBuildInfo(),
Factories: factories,
Config: validCfg,
telemetry: telemetry,
})
require.NoError(t, err)

// Start the new service
require.NoError(t, srvTwo.Start(context.Background()))

// check telemetry server to ensure we get a response
// #nosec G107
resp, err = http.Get(telemetryURL)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)

// Shutdown the new service
require.NoError(t, srvTwo.Shutdown(context.Background()))
}

func createExampleService(t *testing.T, factories component.Factories) *service {
// Read yaml config from file
prov, err := NewConfigProvider(newDefaultConfigProviderSettings([]string{filepath.Join("testdata", "otelcol-nop.yaml")}))
Expand Down