diff --git a/cmd/main.go b/cmd/main.go index d8cf6fe843..77865d29ec 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -17,6 +17,7 @@ limitations under the License. package main import ( + "context" "net/http" flag "github.com/spf13/pflag" @@ -39,6 +40,21 @@ func main() { options := GetOptions(fs) + // Start tracing as soon as possible + if options.ServerOptions.EnableOtelTracing { + exporter, err := driver.InitOtelTracing() + if err != nil { + klog.ErrorS(err, "failed to initialize otel tracing") + klog.FlushAndExit(klog.ExitFlushTimeout, 1) + } + // Exporter will flush traces on shutdown + defer func() { + if err := exporter.Shutdown(context.Background()); err != nil { + klog.ErrorS(err, "could not shutdown otel exporter") + } + }() + } + cloud.RegisterMetrics() if options.ServerOptions.HttpEndpoint != "" { mux := http.NewServeMux() @@ -62,6 +78,7 @@ func main() { driver.WithAwsSdkDebugLog(options.ControllerOptions.AwsSdkDebugLog), driver.WithWarnOnInvalidTag(options.ControllerOptions.WarnOnInvalidTag), driver.WithUserAgentExtra(options.ControllerOptions.UserAgentExtra), + driver.WithOtelTracing(options.ServerOptions.EnableOtelTracing), ) if err != nil { klog.ErrorS(err, "failed to create driver") diff --git a/cmd/options/server_options.go b/cmd/options/server_options.go index ddfbdaec72..52d6d7b49e 100644 --- a/cmd/options/server_options.go +++ b/cmd/options/server_options.go @@ -28,9 +28,12 @@ type ServerOptions struct { Endpoint string // HttpEndpoint is the endpoint that the HTTP server for metrics should listen on. HttpEndpoint string + // EnableOtelTracing enables opentelemetry tracing. + EnableOtelTracing bool } func (s *ServerOptions) AddFlags(fs *flag.FlagSet) { fs.StringVar(&s.Endpoint, "endpoint", driver.DefaultCSIEndpoint, "Endpoint for the CSI driver server") fs.StringVar(&s.HttpEndpoint, "http-endpoint", "", "The TCP network address where the HTTP server for metrics will listen (example: `:8080`). The default is empty string, which means the server is disabled.") + fs.BoolVar(&s.EnableOtelTracing, "enable-otel-tracing", false, "To enable opentelemetry tracing for the driver. The tracing is disabled by default. Configure the exporter endpoint with OTEL_EXPORTER_OTLP_ENDPOINT and other env variables, see https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration.") } diff --git a/cmd/options_test.go b/cmd/options_test.go index 783b6ac0bf..e87ad3d3b9 100644 --- a/cmd/options_test.go +++ b/cmd/options_test.go @@ -53,6 +53,8 @@ func TestGetOptions(t *testing.T) { var VolumeAttachLimit int64 = 42 userAgentExtraFlag := "user-agent-extra" userAgentExtraFlagValue := "test" + otelTracingFlagName := "enable-otel-tracing" + otelTracingFlagValue := true args := append([]string{ "aws-ebs-csi-driver", @@ -60,6 +62,7 @@ func TestGetOptions(t *testing.T) { if withServerOptions { args = append(args, "--"+endpointFlagName+"="+endpoint) + args = append(args, "--"+otelTracingFlagName+"="+strconv.FormatBool(otelTracingFlagValue)) } if withControllerOptions { args = append(args, "--"+extraTagsFlagName+"="+extraTagKey+"="+extraTagValue) @@ -83,6 +86,10 @@ func TestGetOptions(t *testing.T) { if options.ServerOptions.Endpoint != endpoint { t.Fatalf("expected endpoint to be %q but it is %q", endpoint, options.ServerOptions.Endpoint) } + otelTracingFlag := flagSet.Lookup(otelTracingFlagName) + if otelTracingFlag == nil { + t.Fatalf("expected %q flag to be added but it is not", otelTracingFlagName) + } } if withControllerOptions { diff --git a/docs/options.md b/docs/options.md index 01c4251153..3eb7663e50 100644 --- a/docs/options.md +++ b/docs/options.md @@ -1,4 +1,5 @@ # Driver Options + There are a couple of driver options that can be passed as arguments when starting the driver container. | Option argument | value sample | default | Description | @@ -10,3 +11,4 @@ There are a couple of driver options that can be passed as arguments when starti | aws-sdk-debug-log | true | false | If set to true, the driver will enable the aws sdk debug log level| | logging-format | json | text | Sets the log format. Permitted formats: text, json| | user-agent-extra | csi-ebs | helm | Extra string appended to user agent| +| enable-otel-tracing | true | false | If set to true, the driver will enable opentelemetry tracing. Might need [additional env variables](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration) to export the traces to the right collector| diff --git a/go.mod b/go.mod index b63e1e8e23..a73c0463b7 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,11 @@ require ( github.com/onsi/gomega v1.27.8 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.41.1 + go.opentelemetry.io/otel v1.15.1 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.15.1 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.15.1 + go.opentelemetry.io/otel/sdk v1.15.1 golang.org/x/sys v0.11.0 google.golang.org/grpc v1.56.2 google.golang.org/protobuf v1.31.0 @@ -89,14 +94,9 @@ require ( go.etcd.io/etcd/api/v3 v3.5.9 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.9 // indirect go.etcd.io/etcd/client/v3 v3.5.9 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.41.1 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.41.1 // indirect - go.opentelemetry.io/otel v1.15.1 // indirect go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.15.1 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.15.1 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.15.1 // indirect go.opentelemetry.io/otel/metric v0.38.1 // indirect - go.opentelemetry.io/otel/sdk v1.15.1 // indirect go.opentelemetry.io/otel/trace v1.15.1 // indirect go.opentelemetry.io/proto/otlp v0.19.0 // indirect go.uber.org/atomic v1.11.0 // indirect diff --git a/pkg/driver/driver.go b/pkg/driver/driver.go index 394554386c..d92964af96 100644 --- a/pkg/driver/driver.go +++ b/pkg/driver/driver.go @@ -24,6 +24,7 @@ import ( "github.com/awslabs/volume-modifier-for-k8s/pkg/rpc" csi "github.com/container-storage-interface/spec/lib/go/csi" "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/util" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" "k8s.io/klog/v2" ) @@ -69,6 +70,7 @@ type DriverOptions struct { awsSdkDebugLog bool warnOnInvalidTag bool userAgentExtra string + otelTracing bool } func NewDriver(options ...func(*DriverOptions)) (*Driver, error) { @@ -123,8 +125,14 @@ func (d *Driver) Run() error { } return resp, err } + + grpcInterceptor := grpc.UnaryInterceptor(logErr) + if d.options.otelTracing { + grpcInterceptor = grpc.ChainUnaryInterceptor(logErr, otelgrpc.UnaryServerInterceptor()) + } + opts := []grpc.ServerOption{ - grpc.UnaryInterceptor(logErr), + grpcInterceptor, } d.srv = grpc.NewServer(opts...) @@ -208,3 +216,9 @@ func WithUserAgentExtra(userAgentExtra string) func(*DriverOptions) { o.userAgentExtra = userAgentExtra } } + +func WithOtelTracing(enableOtelTracing bool) func(*DriverOptions) { + return func(o *DriverOptions) { + o.otelTracing = enableOtelTracing + } +} diff --git a/pkg/driver/driver_test.go b/pkg/driver/driver_test.go index 239e1a28ad..e98229ae0b 100644 --- a/pkg/driver/driver_test.go +++ b/pkg/driver/driver_test.go @@ -103,3 +103,12 @@ func TestWithUserAgentExtra(t *testing.T) { t.Fatalf("expected userAgentExtra option got set to %s but is set to %s", userAgentExtra, options.userAgentExtra) } } + +func TestWithOtelTracing(t *testing.T) { + var enableOtelTracing bool = true + options := &DriverOptions{} + WithOtelTracing(enableOtelTracing)(options) + if options.otelTracing != enableOtelTracing { + t.Fatalf("expected otelTracing option got set to %v but is set to %v", enableOtelTracing, options.otelTracing) + } +} diff --git a/pkg/driver/trace.go b/pkg/driver/trace.go new file mode 100644 index 0000000000..19e3bdb3b9 --- /dev/null +++ b/pkg/driver/trace.go @@ -0,0 +1,56 @@ +/* +Copyright 2023 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package driver + +import ( + "context" + "fmt" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + "k8s.io/klog/v2" +) + +func InitOtelTracing() (*otlptrace.Exporter, error) { + // Setup OTLP exporter + ctx := context.Background() + exporter, err := otlptracegrpc.New(ctx) + if err != nil { + return nil, fmt.Errorf("failed to create the OTLP exporter: %w", err) + } + + // Resource will auto populate spans with common attributes + resource, err := resource.New(ctx, + resource.WithFromEnv(), // pull attributes from OTEL_RESOURCE_ATTRIBUTES and OTEL_SERVICE_NAME environment variables + resource.WithProcess(), + resource.WithOS(), + resource.WithContainer(), + resource.WithHost(), + ) + if err != nil { + klog.ErrorS(err, "failed to create the OTLP resource, spans will lack some metadata") + } + + // Create a trace provider with the exporter. + // Use propagator and sampler defined in environment variables. + traceProvider := trace.NewTracerProvider(trace.WithBatcher(exporter), trace.WithResource(resource)) + + // Register the trace provider as global. + otel.SetTracerProvider(traceProvider) + + return exporter, nil +}