-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
service.go
364 lines (306 loc) · 12.5 KB
/
service.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
//go:generate mdatagen metadata.yaml
package service // import "go.opentelemetry.io/collector/service"
import (
"context"
"errors"
"fmt"
"runtime"
"go.opentelemetry.io/contrib/config"
"go.opentelemetry.io/otel/log"
"go.opentelemetry.io/otel/metric"
sdkresource "go.opentelemetry.io/otel/sdk/resource"
"go.uber.org/multierr"
"go.uber.org/zap"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/config/configtelemetry"
"go.opentelemetry.io/collector/confmap"
"go.opentelemetry.io/collector/connector"
"go.opentelemetry.io/collector/exporter"
"go.opentelemetry.io/collector/extension"
"go.opentelemetry.io/collector/featuregate"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/processor"
"go.opentelemetry.io/collector/receiver"
semconv "go.opentelemetry.io/collector/semconv/v1.26.0"
"go.opentelemetry.io/collector/service/extensions"
"go.opentelemetry.io/collector/service/internal/builders"
"go.opentelemetry.io/collector/service/internal/graph"
"go.opentelemetry.io/collector/service/internal/proctelemetry"
"go.opentelemetry.io/collector/service/internal/resource"
"go.opentelemetry.io/collector/service/internal/status"
"go.opentelemetry.io/collector/service/telemetry"
)
// useOtelWithSDKConfigurationForInternalTelemetryFeatureGate is the feature gate that controls whether the collector
// supports configuring the OpenTelemetry SDK via configuration
var _ = featuregate.GlobalRegistry().MustRegister(
"telemetry.useOtelWithSDKConfigurationForInternalTelemetry",
featuregate.StageStable,
featuregate.WithRegisterToVersion("v0.110.0"),
featuregate.WithRegisterDescription("controls whether the collector supports extended OpenTelemetry"+
"configuration for internal telemetry"))
// Settings holds configuration for building a new Service.
type Settings struct {
// BuildInfo provides collector start information.
BuildInfo component.BuildInfo
// CollectorConf contains the Collector's current configuration
CollectorConf *confmap.Conf
// Receivers configuration to its builder.
ReceiversConfigs map[component.ID]component.Config
ReceiversFactories map[component.Type]receiver.Factory
// Processors configuration to its builder.
ProcessorsConfigs map[component.ID]component.Config
ProcessorsFactories map[component.Type]processor.Factory
// exporters configuration to its builder.
ExportersConfigs map[component.ID]component.Config
ExportersFactories map[component.Type]exporter.Factory
// Connectors configuration to its builder.
ConnectorsConfigs map[component.ID]component.Config
ConnectorsFactories map[component.Type]connector.Factory
// Extensions builder for extensions.
Extensions builders.Extension
// Extensions configuration to its builder.
ExtensionsConfigs map[component.ID]component.Config
ExtensionsFactories map[component.Type]extension.Factory
// ModuleInfo describes the go module for each component.
ModuleInfo extension.ModuleInfo
// AsyncErrorChannel is the channel that is used to report fatal errors.
AsyncErrorChannel chan error
// LoggingOptions provides a way to change behavior of zap logging.
LoggingOptions []zap.Option
}
// Service represents the implementation of a component.Host.
type Service struct {
buildInfo component.BuildInfo
telemetrySettings component.TelemetrySettings
host *graph.Host
collectorConf *confmap.Conf
loggerProvider log.LoggerProvider
}
// New creates a new Service, its telemetry, and Components.
func New(ctx context.Context, set Settings, cfg Config) (*Service, error) {
srv := &Service{
buildInfo: set.BuildInfo,
host: &graph.Host{
Receivers: builders.NewReceiver(set.ReceiversConfigs, set.ReceiversFactories),
Processors: builders.NewProcessor(set.ProcessorsConfigs, set.ProcessorsFactories),
Exporters: builders.NewExporter(set.ExportersConfigs, set.ExportersFactories),
Connectors: builders.NewConnector(set.ConnectorsConfigs, set.ConnectorsFactories),
Extensions: builders.NewExtension(set.ExtensionsConfigs, set.ExtensionsFactories),
ModuleInfo: set.ModuleInfo,
BuildInfo: set.BuildInfo,
AsyncErrorChannel: set.AsyncErrorChannel,
},
collectorConf: set.CollectorConf,
}
// Fetch data for internal telemetry like instance id and sdk version to provide for internal telemetry.
res := resource.New(set.BuildInfo, cfg.Telemetry.Resource)
pcommonRes := pdataFromSdk(res)
sch := semconv.SchemaURL
cfgRes := config.Resource{
SchemaUrl: &sch,
Attributes: attributes(res, cfg.Telemetry),
}
sdk, err := config.NewSDK(
config.WithContext(ctx),
config.WithOpenTelemetryConfiguration(
config.OpenTelemetryConfiguration{
LoggerProvider: &config.LoggerProvider{
Processors: cfg.Telemetry.Logs.Processors,
},
TracerProvider: &config.TracerProvider{
Processors: cfg.Telemetry.Traces.Processors,
},
Resource: &cfgRes,
},
),
)
if err != nil {
return nil, fmt.Errorf("failed to create SDK: %w", err)
}
telFactory := telemetry.NewFactory()
telset := telemetry.Settings{
BuildInfo: set.BuildInfo,
ZapOptions: set.LoggingOptions,
SDK: &sdk,
}
logger, lp, err := telFactory.CreateLogger(ctx, telset, &cfg.Telemetry)
if err != nil {
return nil, fmt.Errorf("failed to create logger: %w", err)
}
srv.loggerProvider = lp
tracerProvider, err := telFactory.CreateTracerProvider(ctx, telset, &cfg.Telemetry)
if err != nil {
return nil, fmt.Errorf("failed to create tracer provider: %w", err)
}
logger.Info("Setting up own telemetry...")
mp, err := telFactory.CreateMeterProvider(ctx, telset, &cfg.Telemetry)
if err != nil {
return nil, fmt.Errorf("failed to create meter provider: %w", err)
}
logsAboutMeterProvider(logger, cfg.Telemetry.Metrics, mp)
srv.telemetrySettings = component.TelemetrySettings{
Logger: logger,
MeterProvider: mp,
TracerProvider: tracerProvider,
MetricsLevel: cfg.Telemetry.Metrics.Level,
// Construct telemetry attributes from build info and config's resource attributes.
Resource: pcommonRes,
}
srv.host.Reporter = status.NewReporter(srv.host.NotifyComponentStatusChange, func(err error) {
if errors.Is(err, status.ErrStatusNotReady) {
logger.Warn("Invalid transition", zap.Error(err))
}
// ignore other errors as they represent invalid state transitions and are considered benign.
})
if err = srv.initGraph(ctx, cfg); err != nil {
err = multierr.Append(err, srv.shutdownTelemetry(ctx))
return nil, err
}
// process the configuration and initialize the pipeline
if err = srv.initExtensions(ctx, cfg.Extensions); err != nil {
err = multierr.Append(err, srv.shutdownTelemetry(ctx))
return nil, err
}
if err = proctelemetry.RegisterProcessMetrics(srv.telemetrySettings); err != nil {
return nil, fmt.Errorf("failed to register process metrics: %w", err)
}
return srv, nil
}
func logsAboutMeterProvider(logger *zap.Logger, cfg telemetry.MetricsConfig, mp metric.MeterProvider) {
if cfg.Level == configtelemetry.LevelNone || len(cfg.Readers) == 0 {
logger.Info("Skipped telemetry setup.")
return
}
//nolint
if len(cfg.Address) != 0 {
logger.Warn("service::telemetry::metrics::address is being deprecated in favor of service::telemetry::metrics::readers")
}
if lmp, ok := mp.(interface {
LogAboutServers(logger *zap.Logger, cfg telemetry.MetricsConfig)
}); ok {
lmp.LogAboutServers(logger, cfg)
}
}
// Start starts the extensions and pipelines. If Start fails Shutdown should be called to ensure a clean state.
// Start does the following steps in order:
// 1. Start all extensions.
// 2. Notify extensions about Collector configuration
// 3. Start all pipelines.
// 4. Notify extensions that the pipeline is ready.
func (srv *Service) Start(ctx context.Context) error {
srv.telemetrySettings.Logger.Info("Starting "+srv.buildInfo.Command+"...",
zap.String("Version", srv.buildInfo.Version),
zap.Int("NumCPU", runtime.NumCPU()),
)
if err := srv.host.ServiceExtensions.Start(ctx, srv.host); err != nil {
return fmt.Errorf("failed to start extensions: %w", err)
}
if srv.collectorConf != nil {
if err := srv.host.ServiceExtensions.NotifyConfig(ctx, srv.collectorConf); err != nil {
return err
}
}
if err := srv.host.Pipelines.StartAll(ctx, srv.host); err != nil {
return fmt.Errorf("cannot start pipelines: %w", err)
}
if err := srv.host.ServiceExtensions.NotifyPipelineReady(); err != nil {
return err
}
srv.telemetrySettings.Logger.Info("Everything is ready. Begin running and processing data.")
return nil
}
func (srv *Service) shutdownTelemetry(ctx context.Context) error {
// The metric.MeterProvider and trace.TracerProvider interfaces do not have a Shutdown method.
// To shutdown the providers we try to cast to this interface, which matches the type signature used in the SDK.
type shutdownable interface {
Shutdown(context.Context) error
}
var err error
if prov, ok := srv.telemetrySettings.MeterProvider.(shutdownable); ok {
if shutdownErr := prov.Shutdown(ctx); shutdownErr != nil {
err = multierr.Append(err, fmt.Errorf("failed to shutdown meter provider: %w", shutdownErr))
}
}
if prov, ok := srv.telemetrySettings.TracerProvider.(shutdownable); ok {
if shutdownErr := prov.Shutdown(ctx); shutdownErr != nil {
err = multierr.Append(err, fmt.Errorf("failed to shutdown tracer provider: %w", shutdownErr))
}
}
if prov, ok := srv.loggerProvider.(shutdownable); ok {
if shutdownErr := prov.Shutdown(ctx); shutdownErr != nil {
err = multierr.Append(err, fmt.Errorf("failed to shutdown logger provider: %w", shutdownErr))
}
}
return err
}
// Shutdown the service. Shutdown will do the following steps in order:
// 1. Notify extensions that the pipeline is shutting down.
// 2. Shutdown all pipelines.
// 3. Shutdown all extensions.
// 4. Shutdown telemetry.
func (srv *Service) Shutdown(ctx context.Context) error {
// Accumulate errors and proceed with shutting down remaining components.
var errs error
// Begin shutdown sequence.
srv.telemetrySettings.Logger.Info("Starting shutdown...")
if err := srv.host.ServiceExtensions.NotifyPipelineNotReady(); err != nil {
errs = multierr.Append(errs, fmt.Errorf("failed to notify that pipeline is not ready: %w", err))
}
if err := srv.host.Pipelines.ShutdownAll(ctx, srv.host.Reporter); err != nil {
errs = multierr.Append(errs, fmt.Errorf("failed to shutdown pipelines: %w", err))
}
if err := srv.host.ServiceExtensions.Shutdown(ctx); err != nil {
errs = multierr.Append(errs, fmt.Errorf("failed to shutdown extensions: %w", err))
}
srv.telemetrySettings.Logger.Info("Shutdown complete.")
errs = multierr.Append(errs, srv.shutdownTelemetry(ctx))
return errs
}
// Creates extensions.
func (srv *Service) initExtensions(ctx context.Context, cfg extensions.Config) error {
var err error
extensionsSettings := extensions.Settings{
Telemetry: srv.telemetrySettings,
BuildInfo: srv.buildInfo,
Extensions: srv.host.Extensions,
ModuleInfo: srv.host.ModuleInfo,
}
if srv.host.ServiceExtensions, err = extensions.New(ctx, extensionsSettings, cfg, extensions.WithReporter(srv.host.Reporter)); err != nil {
return fmt.Errorf("failed to build extensions: %w", err)
}
return nil
}
// Creates the pipeline graph.
func (srv *Service) initGraph(ctx context.Context, cfg Config) error {
var err error
if srv.host.Pipelines, err = graph.Build(ctx, graph.Settings{
Telemetry: srv.telemetrySettings,
BuildInfo: srv.buildInfo,
ReceiverBuilder: srv.host.Receivers,
ProcessorBuilder: srv.host.Processors,
ExporterBuilder: srv.host.Exporters,
ConnectorBuilder: srv.host.Connectors,
PipelineConfigs: cfg.Pipelines,
ReportStatus: srv.host.Reporter.ReportStatus,
}); err != nil {
return fmt.Errorf("failed to build pipelines: %w", err)
}
return nil
}
// Logger returns the logger created for this service.
// This is a temporary API that may be removed soon after investigating how the collector should record different events.
func (srv *Service) Logger() *zap.Logger {
return srv.telemetrySettings.Logger
}
func pdataFromSdk(res *sdkresource.Resource) pcommon.Resource {
// pcommon.NewResource is the best way to generate a new resource currently and is safe to use outside of tests.
// Because the resource is signal agnostic, and we need a net new resource, not an existing one, this is the only
// method of creating it without exposing internal packages.
pcommonRes := pcommon.NewResource()
for _, keyValue := range res.Attributes() {
pcommonRes.Attributes().PutStr(string(keyValue.Key), keyValue.Value.AsString())
}
return pcommonRes
}