Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[management] Refactor HTTP metrics #2476

Merged
merged 4 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions management/server/account.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,12 @@ func (a *Account) GetPeerNetworkMap(
objectCount := int64(len(peersToConnect) + len(expiredPeers) + len(routesUpdate) + len(firewallRules))
metrics.CountNetworkMapObjects(objectCount)
metrics.CountGetPeerNetworkMapDuration(time.Since(start))

if objectCount > 5000 {
log.WithContext(ctx).Tracef("account: %s has a total resource count of %d objects, "+
"peers to connect: %d, expired peers: %d, routes: %d, firewall rules: %d",
a.Id, objectCount, len(peersToConnect), len(expiredPeers), len(routesUpdate), len(firewallRules))
}
}

return nm
Expand Down
21 changes: 0 additions & 21 deletions management/server/http/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,27 +100,6 @@ func APIHandler(ctx context.Context, accountManager s.AccountManager, LocationMa
api.addPostureCheckEndpoint()
api.addLocationsEndpoint()

err := api.Router.Walk(func(route *mux.Route, _ *mux.Router, _ []*mux.Route) error {
methods, err := route.GetMethods()
if err != nil { // we may have wildcard routes from integrations without methods, skip them for now
methods = []string{}
}
for _, method := range methods {
template, err := route.GetPathTemplate()
if err != nil {
return err
}
err = metricsMiddleware.AddHTTPRequestResponseCounter(template, method)
if err != nil {
return err
}
}
return nil
})
if err != nil {
return nil, err
}

return rootRouter, nil
}

Expand Down
13 changes: 13 additions & 0 deletions management/server/sql_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ func (s *SqlStore) AcquireReadLockByUID(ctx context.Context, uniqueID string) (u

func (s *SqlStore) SaveAccount(ctx context.Context, account *Account) error {
start := time.Now()
defer func() {
elapsed := time.Since(start)
if elapsed > 1*time.Second {
log.WithContext(ctx).Tracef("SaveAccount for account %s exceeded 1s, took: %v", account.Id, elapsed)
}
}()

// todo: remove this check after the issue is resolved
s.checkAccountDomainBeforeSave(ctx, account.Id, account.Domain)
Expand Down Expand Up @@ -513,6 +519,13 @@ func (s *SqlStore) GetAllAccounts(ctx context.Context) (all []*Account) {
}

func (s *SqlStore) GetAccount(ctx context.Context, accountID string) (*Account, error) {
start := time.Now()
defer func() {
elapsed := time.Since(start)
if elapsed > 1*time.Second {
log.WithContext(ctx).Tracef("GetAccount for account %s exceeded 1s, took: %v", accountID, elapsed)
}
}()

var account Account
result := s.db.Model(&account).
Expand Down
126 changes: 48 additions & 78 deletions management/server/telemetry/http_api_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/google/uuid"
"github.com/gorilla/mux"
log "github.com/sirupsen/logrus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
Expand Down Expand Up @@ -54,112 +55,89 @@ func (rw *WrappedResponseWriter) WriteHeader(code int) {
// HTTPMiddleware handler used to collect metrics of every request/response coming to the API.
// Also adds request tracing (logging).
type HTTPMiddleware struct {
meter metric.Meter
ctx context.Context
ctx context.Context
// all HTTP requests by endpoint & method
httpRequestCounters map[string]metric.Int64Counter
httpRequestCounter metric.Int64Counter
// all HTTP responses by endpoint & method & status code
httpResponseCounters map[string]metric.Int64Counter
httpResponseCounter metric.Int64Counter
// all HTTP requests
totalHTTPRequestsCounter metric.Int64Counter
// all HTTP responses
totalHTTPResponseCounter metric.Int64Counter
// all HTTP responses by status code
totalHTTPResponseCodeCounters map[int]metric.Int64Counter
totalHTTPResponseCodeCounter metric.Int64Counter
// all HTTP requests durations by endpoint and method
httpRequestDurations map[string]metric.Int64Histogram
httpRequestDuration metric.Int64Histogram
// all HTTP requests durations
totalHTTPRequestDuration metric.Int64Histogram
}

// NewMetricsMiddleware creates a new HTTPMiddleware
func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) {
totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s_total", httpRequestCounterPrefix), metric.WithUnit("1"))
httpRequestCounter, err := meter.Int64Counter(httpRequestCounterPrefix, metric.WithUnit("1"))
if err != nil {
return nil, err
}

totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s_total", httpResponseCounterPrefix), metric.WithUnit("1"))
httpResponseCounter, err := meter.Int64Counter(httpResponseCounterPrefix, metric.WithUnit("1"))
if err != nil {
return nil, err
}

totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s_total", httpRequestDurationPrefix), metric.WithUnit("milliseconds"))
totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpRequestCounterPrefix), metric.WithUnit("1"))
if err != nil {
return nil, err
}

return &HTTPMiddleware{
ctx: ctx,
httpRequestCounters: map[string]metric.Int64Counter{},
httpResponseCounters: map[string]metric.Int64Counter{},
httpRequestDurations: map[string]metric.Int64Histogram{},
totalHTTPResponseCodeCounters: map[int]metric.Int64Counter{},
meter: meter,
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
totalHTTPResponseCounter: totalHTTPResponseCounter,
totalHTTPRequestDuration: totalHTTPRequestDuration,
},
nil
}
totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpResponseCounterPrefix), metric.WithUnit("1"))
if err != nil {
return nil, err
}

// AddHTTPRequestResponseCounter adds a new meter for an HTTP defaultEndpoint and Method (GET, POST, etc)
// Creates one request counter and multiple response counters (one per http response status code).
func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method string) error {
meterKey := getRequestCounterKey(endpoint, method)
httpReqCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1"))
totalHTTPResponseCodeCounter, err := meter.Int64Counter(fmt.Sprintf("%s.code.total", httpResponseCounterPrefix), metric.WithUnit("1"))
if err != nil {
return err
return nil, err
}
m.httpRequestCounters[meterKey] = httpReqCounter

durationKey := getRequestDurationKey(endpoint, method)
requestDuration, err := m.meter.Int64Histogram(durationKey, metric.WithUnit("milliseconds"))
httpRequestDuration, err := meter.Int64Histogram(httpRequestDurationPrefix, metric.WithUnit("milliseconds"))
if err != nil {
return err
return nil, err
}
m.httpRequestDurations[durationKey] = requestDuration

respCodes := []int{200, 204, 400, 401, 403, 404, 500, 502, 503}
for _, code := range respCodes {
meterKey = getResponseCounterKey(endpoint, method, code)
httpRespCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1"))
if err != nil {
return err
}
m.httpResponseCounters[meterKey] = httpRespCounter

meterKey = fmt.Sprintf("%s_%d_total", httpResponseCounterPrefix, code)
totalHTTPResponseCodeCounter, err := m.meter.Int64Counter(meterKey, metric.WithUnit("1"))
if err != nil {
return err
}
m.totalHTTPResponseCodeCounters[code] = totalHTTPResponseCodeCounter
totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s.total", httpRequestDurationPrefix), metric.WithUnit("milliseconds"))
if err != nil {
return nil, err
}

return nil
return &HTTPMiddleware{
ctx: ctx,
httpRequestCounter: httpRequestCounter,
httpResponseCounter: httpResponseCounter,
httpRequestDuration: httpRequestDuration,
totalHTTPResponseCodeCounter: totalHTTPResponseCodeCounter,
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
totalHTTPResponseCounter: totalHTTPResponseCounter,
totalHTTPRequestDuration: totalHTTPRequestDuration,
},
nil
}

func replaceEndpointChars(endpoint string) string {
endpoint = strings.ReplaceAll(endpoint, "/", "_")
endpoint = strings.ReplaceAll(endpoint, "{", "")
endpoint = strings.ReplaceAll(endpoint, "}", "")
return endpoint
}

func getRequestCounterKey(endpoint, method string) string {
endpoint = replaceEndpointChars(endpoint)
return fmt.Sprintf("%s%s_%s", httpRequestCounterPrefix, endpoint, method)
}

func getRequestDurationKey(endpoint, method string) string {
endpoint = replaceEndpointChars(endpoint)
return fmt.Sprintf("%s%s_%s", httpRequestDurationPrefix, endpoint, method)
}

func getResponseCounterKey(endpoint, method string, status int) string {
endpoint = replaceEndpointChars(endpoint)
return fmt.Sprintf("%s%s_%s_%d", httpResponseCounterPrefix, endpoint, method, status)
func getEndpointMetricAttr(r *http.Request) string {
var endpoint string
route := mux.CurrentRoute(r)
if route != nil {
pathTmpl, err := route.GetPathTemplate()
if err == nil {
endpoint = replaceEndpointChars(pathTmpl)
}
}
return endpoint
}

// Handler logs every request and response and adds the, to metrics.
Expand All @@ -176,11 +154,10 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {

log.WithContext(ctx).Tracef("HTTP request %v: %v %v", reqID, r.Method, r.URL)

metricKey := getRequestCounterKey(r.URL.Path, r.Method)
endpointAttr := attribute.String("endpoint", getEndpointMetricAttr(r))
methodAttr := attribute.String("method", r.Method)

if c, ok := m.httpRequestCounters[metricKey]; ok {
c.Add(m.ctx, 1)
}
m.httpRequestCounter.Add(m.ctx, 1, metric.WithAttributes(endpointAttr, methodAttr))
m.totalHTTPRequestsCounter.Add(m.ctx, 1)

w := WrapResponseWriter(rw)
Expand All @@ -193,21 +170,14 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
log.WithContext(ctx).Tracef("HTTP response %v: %v %v status %v", reqID, r.Method, r.URL, w.Status())
}

metricKey = getResponseCounterKey(r.URL.Path, r.Method, w.Status())
if c, ok := m.httpResponseCounters[metricKey]; ok {
c.Add(m.ctx, 1)
}
statusCodeAttr := attribute.Int("code", w.Status())

m.httpResponseCounter.Add(m.ctx, 1, metric.WithAttributes(endpointAttr, methodAttr, statusCodeAttr))
m.totalHTTPResponseCounter.Add(m.ctx, 1)
if c, ok := m.totalHTTPResponseCodeCounters[w.Status()]; ok {
c.Add(m.ctx, 1)
}
m.totalHTTPResponseCodeCounter.Add(m.ctx, 1, metric.WithAttributes(statusCodeAttr))

durationKey := getRequestDurationKey(r.URL.Path, r.Method)
reqTook := time.Since(reqStart)
if c, ok := m.httpRequestDurations[durationKey]; ok {
c.Record(m.ctx, reqTook.Milliseconds())
}
m.httpRequestDuration.Record(m.ctx, reqTook.Milliseconds(), metric.WithAttributes(endpointAttr, methodAttr))
log.WithContext(ctx).Debugf("request %s %s took %d ms and finished with status %d", r.Method, r.URL.Path, reqTook.Milliseconds(), w.Status())

if w.Status() == 200 && (r.Method == http.MethodPut || r.Method == http.MethodPost || r.Method == http.MethodDelete) {
Expand Down
Loading