Skip to content

Commit

Permalink
Add grpc server and client metrics to Teleport (#11773)
Browse files Browse the repository at this point in the history
Add grpc server and client metrics to Teleport with optional latency metrics that can be enabled via metrics_service
  • Loading branch information
rcanderson23 authored Apr 7, 2022
1 parent 9e7408a commit 4bba628
Show file tree
Hide file tree
Showing 11 changed files with 254 additions and 6 deletions.
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ require (
github.com/gravitational/teleport/api v0.0.0
github.com/gravitational/trace v1.1.17
github.com/gravitational/ttlmap v0.0.0-20171116003245-91fd36b9004c
github.com/grpc-ecosystem/go-grpc-middleware/providers/openmetrics/v2 v2.0.0-20220308023801-e4a6915ea237
github.com/hashicorp/golang-lru v0.5.4
github.com/jackc/pgconn v1.11.0
github.com/jackc/pgerrcode v0.0.0-20201024163028-a0d42d470451
Expand Down Expand Up @@ -159,6 +160,8 @@ require (
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c // indirect
github.com/gorilla/handlers v1.5.1 // indirect
github.com/gorilla/mux v1.8.0 // indirect
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2.0.20220308023801-e4a6915ea237 // indirect
github.com/hashicorp/go-uuid v1.0.2 // indirect
github.com/imdario/mergo v0.3.5 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
Expand Down
138 changes: 136 additions & 2 deletions go.sum

Large diffs are not rendered by default.

38 changes: 37 additions & 1 deletion lib/auth/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import (
"github.com/gravitational/teleport/lib/utils"

"github.com/gravitational/trace"
om "github.com/grpc-ecosystem/go-grpc-middleware/providers/openmetrics/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"golang.org/x/net/http2"
"google.golang.org/grpc"
Expand All @@ -62,6 +64,8 @@ type TLSServerConfig struct {
AcceptedUsage []string
// ID is an optional debugging ID
ID string
// Metrics are optional TLSServer metrics
Metrics *Metrics
}

// CheckAndSetDefaults checks and sets default values
Expand Down Expand Up @@ -91,9 +95,17 @@ func (c *TLSServerConfig) CheckAndSetDefaults() error {
if c.Component == "" {
c.Component = teleport.ComponentAuth
}
if c.Metrics == nil {
c.Metrics = &Metrics{}
}
return nil
}

// Metrics handles optional metrics for TLSServerConfig
type Metrics struct {
GRPCServerLatency bool
}

// TLSServer is TLS auth server
type TLSServer struct {
// httpServer is HTTP/1.1 part of the server
Expand All @@ -120,13 +132,21 @@ func NewTLSServer(cfg TLSServerConfig) (*TLSServer, error) {
if err != nil {
return nil, trace.Wrap(err)
}

// sets up grpc metrics interceptor
grpcMetrics := utils.CreateGRPCServerMetrics(cfg.Metrics.GRPCServerLatency, prometheus.Labels{teleport.TagServer: "teleport-auth"})
err = utils.RegisterPrometheusCollectors(grpcMetrics)
if err != nil {
return nil, trace.Wrap(err)
}
// authMiddleware authenticates request assuming TLS client authentication
// adds authentication information to the context
// and passes it to the API server
authMiddleware := &Middleware{
AccessPoint: cfg.AccessPoint,
AcceptedUsage: cfg.AcceptedUsage,
Limiter: limiter,
GRPCMetrics: grpcMetrics,
}

apiServer, err := NewAPIServer(&cfg.APIConfig)
Expand Down Expand Up @@ -310,6 +330,8 @@ type Middleware struct {
AcceptedUsage []string
// Limiter is a rate and connection limiter
Limiter *limiter.Limiter
// GRPCMetrics is the configured grpc metrics for the interceptors
GRPCMetrics *om.ServerMetrics
}

// Wrap sets next handler in chain
Expand Down Expand Up @@ -381,16 +403,30 @@ func (a *Middleware) withAuthenticatedUserStreamInterceptor(srv interface{}, ser
// limiting, authenticates requests, and passes the user information as context
// metadata.
func (a *Middleware) UnaryInterceptor() grpc.UnaryServerInterceptor {
if a.GRPCMetrics != nil {
return utils.ChainUnaryServerInterceptors(
om.UnaryServerInterceptor(a.GRPCMetrics),
utils.ErrorConvertUnaryInterceptor,
a.Limiter.UnaryServerInterceptorWithCustomRate(getCustomRate),
a.withAuthenticatedUserUnaryInterceptor)
}
return utils.ChainUnaryServerInterceptors(
utils.ErrorConvertUnaryInterceptor,
a.Limiter.UnaryServerInterceptorWithCustomRate(getCustomRate),
a.withAuthenticatedUserUnaryInterceptor)
}

// UnaryInterceptor returns a gPRC stream interceptor which performs rate
// StreamInterceptor returns a gPRC stream interceptor which performs rate
// limiting, authenticates requests, and passes the user information as context
// metadata.
func (a *Middleware) StreamInterceptor() grpc.StreamServerInterceptor {
if a.GRPCMetrics != nil {
return utils.ChainStreamServerInterceptors(
om.StreamServerInterceptor(a.GRPCMetrics),
utils.ErrorConvertStreamInterceptor,
a.Limiter.StreamServerInterceptor,
a.withAuthenticatedUserStreamInterceptor)
}
return utils.ChainStreamServerInterceptors(
utils.ErrorConvertStreamInterceptor,
a.Limiter.StreamServerInterceptor,
Expand Down
3 changes: 3 additions & 0 deletions lib/config/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,9 @@ func applyMetricsConfig(fc *FileConfig, cfg *service.Config) error {
}
cfg.Metrics.ListenAddr = addr

cfg.Metrics.GRPCServerLatency = fc.Metrics.GRPCServerLatency
cfg.Metrics.GRPCClientLatency = fc.Metrics.GRPCClientLatency

if !fc.Metrics.MTLSEnabled() {
return nil
}
Expand Down
6 changes: 5 additions & 1 deletion lib/config/configuration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,9 @@ func TestConfigReading(t *testing.T) {
Certificate: "/etc/teleport/proxy.crt",
},
},
CACerts: []string{"/etc/teleport/ca.crt"},
CACerts: []string{"/etc/teleport/ca.crt"},
GRPCServerLatency: true,
GRPCClientLatency: true,
},
WindowsDesktop: WindowsDesktopService{
Service: Service{
Expand Down Expand Up @@ -1162,6 +1164,8 @@ func makeConfigFixture() string {
// Metrics service.
conf.Metrics.EnabledFlag = "yes"
conf.Metrics.ListenAddress = "tcp://metrics"
conf.Metrics.GRPCServerLatency = true
conf.Metrics.GRPCClientLatency = true
conf.Metrics.CACerts = []string{"/etc/teleport/ca.crt"}
conf.Metrics.KeyPairs = []KeyPair{
{
Expand Down
6 changes: 6 additions & 0 deletions lib/config/fileconf.go
Original file line number Diff line number Diff line change
Expand Up @@ -1418,6 +1418,12 @@ type Metrics struct {
// CACerts is a list of prometheus CA certificates to validate clients against.
// mTLS will be enabled for the service if both 'keypairs' and 'ca_certs' fields are set.
CACerts []string `yaml:"ca_certs,omitempty"`

// GRPCServerLatency enables histogram metrics for each grpc endpoint on the auth server
GRPCServerLatency bool `yaml:"grpc_server_latency,omitempty"`

// GRPCServerLatency enables histogram metrics for each grpc endpoint on the auth server
GRPCClientLatency bool `yaml:"grpc_client_latency,omitempty"`
}

// MTLSEnabled returns whether mtls is enabled or not in the metrics service config.
Expand Down
6 changes: 6 additions & 0 deletions lib/service/cfg.go
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,12 @@ type MetricsConfig struct {
// use for mTLS.
// Used in conjunction with MTLS = true
CACerts []string

// GRPCServerLatency enables histogram metrics for each grpc endpoint on the auth server
GRPCServerLatency bool

// GRPCServerLatency enables histogram metrics for each grpc endpoint on the auth server
GRPCClientLatency bool
}

// WindowsDesktopConfig specifies the configuration for the Windows Desktop
Expand Down
22 changes: 20 additions & 2 deletions lib/service/connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,16 @@ import (
"github.com/coreos/go-semver/semver"
"github.com/gravitational/roundtrip"
"github.com/gravitational/trace"
om "github.com/grpc-ecosystem/go-grpc-middleware/providers/openmetrics/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"golang.org/x/crypto/ssh"
"google.golang.org/grpc"

"github.com/gravitational/teleport"
apiclient "github.com/gravitational/teleport/api/client"
"github.com/gravitational/teleport/api/client/proto"
"github.com/gravitational/teleport/api/metadata"
"github.com/gravitational/teleport/api/types"
"github.com/gravitational/teleport/lib"
"github.com/gravitational/teleport/lib/auth"
Expand Down Expand Up @@ -857,7 +861,8 @@ func (process *TeleportProcess) newClient(authServers []utils.NetAddr, identity

logger := process.log.WithField("auth-addrs", utils.NetAddrsToStrings(authServers))
logger.Debug("Attempting to connect to Auth Server directly.")
directClient, directErr := process.newClientDirect(authServers, tlsConfig)

directClient, directErr := process.newClientDirect(authServers, tlsConfig, identity.ID.Role)
if directErr == nil {
logger.Debug("Connected to Auth Server with direct connection.")
return directClient, nil
Expand Down Expand Up @@ -931,17 +936,30 @@ func (process *TeleportProcess) newClientThroughTunnel(authServers []utils.NetAd
return clt, nil
}

func (process *TeleportProcess) newClientDirect(authServers []utils.NetAddr, tlsConfig *tls.Config) (*auth.Client, error) {
func (process *TeleportProcess) newClientDirect(authServers []utils.NetAddr, tlsConfig *tls.Config, role types.SystemRole) (*auth.Client, error) {
var cltParams []roundtrip.ClientParam
if process.Config.ClientTimeout != 0 {
cltParams = []roundtrip.ClientParam{auth.ClientTimeout(process.Config.ClientTimeout)}
}

var dialOpts []grpc.DialOption
if role == types.RoleProxy {
grpcMetrics := utils.CreateGRPCClientMetrics(process.Config.Metrics.GRPCClientLatency, prometheus.Labels{teleport.TagClient: "teleport-proxy"})
if err := utils.RegisterPrometheusCollectors(grpcMetrics); err != nil {
return nil, trace.Wrap(err)
}
dialOpts = append(dialOpts, []grpc.DialOption{
grpc.WithChainUnaryInterceptor(metadata.UnaryClientInterceptor, om.UnaryClientInterceptor(grpcMetrics)),
grpc.WithChainStreamInterceptor(metadata.StreamClientInterceptor, om.StreamClientInterceptor(grpcMetrics)),
}...)
}

clt, err := auth.NewClient(apiclient.Config{
Addrs: utils.NetAddrsToStrings(authServers),
Credentials: []apiclient.Credentials{
apiclient.LoadTLS(tlsConfig),
},
DialOpts: dialOpts,
}, cltParams...)
if err != nil {
return nil, trace.Wrap(err)
Expand Down
2 changes: 2 additions & 0 deletions lib/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -1365,6 +1365,7 @@ func (process *TeleportProcess) initAuthService() error {
return trace.Wrap(err)
}
go mux.Serve()
authMetrics := &auth.Metrics{GRPCServerLatency: cfg.Metrics.GRPCServerLatency}
tlsServer, err := auth.NewTLSServer(auth.TLSServerConfig{
TLS: tlsConfig,
APIConfig: *apiConf,
Expand All @@ -1373,6 +1374,7 @@ func (process *TeleportProcess) initAuthService() error {
Component: teleport.Component(teleport.ComponentAuth, process.id),
ID: process.id,
Listener: mux.TLS(),
Metrics: authMetrics,
})
if err != nil {
return trace.Wrap(err)
Expand Down
30 changes: 30 additions & 0 deletions lib/utils/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/gravitational/teleport"
"github.com/gravitational/trace"
om "github.com/grpc-ecosystem/go-grpc-middleware/providers/openmetrics/v2"

"github.com/prometheus/client_golang/prometheus"
)
Expand Down Expand Up @@ -57,3 +58,32 @@ func BuildCollector() prometheus.Collector {
func() float64 { return 1 },
)
}

// CreateGRPCServerMetrics creates server grpc metrics configuration that is to be registered and used by the caller
// in an openmetrics unary and/or stream interceptor
func CreateGRPCServerMetrics(latencyEnabled bool, labels prometheus.Labels) *om.ServerMetrics {
serverOpts := []om.ServerMetricsOption{om.WithServerCounterOptions(om.WithConstLabels(labels))}
if latencyEnabled {
histOpts := grpcHistogramOpts(labels)
serverOpts = append(serverOpts, om.WithServerHandlingTimeHistogram(histOpts...))
}
return om.NewServerMetrics(serverOpts...)
}

// CreateGRPCClientMetrics creates client grpc metrics configuration that is to be registered and used by the caller
// in an openmetrics unary and/or stream interceptor
func CreateGRPCClientMetrics(latencyEnabled bool, labels prometheus.Labels) *om.ClientMetrics {
clientOpts := []om.ClientMetricsOption{om.WithClientCounterOptions(om.WithConstLabels(labels))}
if latencyEnabled {
histOpts := grpcHistogramOpts(labels)
clientOpts = append(clientOpts, om.WithClientHandlingTimeHistogram(histOpts...))
}
return om.NewClientMetrics(clientOpts...)
}

func grpcHistogramOpts(labels prometheus.Labels) []om.HistogramOption {
return []om.HistogramOption{
om.WithHistogramBuckets(prometheus.ExponentialBuckets(0.001, 2, 16)),
om.WithHistogramConstLabels(labels),
}
}
6 changes: 6 additions & 0 deletions metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,4 +230,10 @@ const (

// TagType is a prometheus label for type of resource or tunnel connected
TagType = "type"

// TagServer is a prometheus label to indicate what server the metric is tied to
TagServer = "server"

// TagClient is a prometheus label to indicate what client the metric is tied to
TagClient = "client"
)

0 comments on commit 4bba628

Please sign in to comment.