Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metrics and additional logging for Data Sources #5404

Merged
merged 5 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion internal/datasources/rest/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,16 @@ import (
"io"
"net/http"
"strings"
"sync"
"time"

"github.com/cenkalti/backoff/v4"
"github.com/rs/zerolog"
"github.com/santhosh-tekuri/jsonschema/v6"
uritemplate "github.com/std-uritemplate/std-uritemplate/go/v2"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"google.golang.org/protobuf/types/known/structpb"

"github.com/mindersec/minder/internal/engine/eval/rego"
Expand All @@ -32,6 +37,12 @@ const (
MaxBytesLimit int64 = 1 << 20
)

var (
metricsInit sync.Once

dataSourceLatencyHistogram metric.Int64Histogram
)

type restHandler struct {
rawInputSchema *structpb.Struct
inputSchema *jsonschema.Schema
Expand All @@ -48,6 +59,21 @@ type restHandler struct {
// TODO implement auth
}

func initMetrics() {
metricsInit.Do(func() {
meter := otel.Meter("minder")
var err error
dataSourceLatencyHistogram, err = meter.Int64Histogram(
"datasource.rest.latency",
metric.WithDescription("Latency of data source requests in milliseconds"),
Comment on lines +66 to +68
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Record the unit with the metric:

Suggested change
dataSourceLatencyHistogram, err = meter.Int64Histogram(
"datasource.rest.latency",
metric.WithDescription("Latency of data source requests in milliseconds"),
dataSourceLatencyHistogram, err = meter.Int64Histogram(
"datasource.rest.latency",
metric.WithDescription("Latency of data source requests in milliseconds"),
metric.WithUnit("ms"),

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, added

metric.WithUnit("ms"),
)
if err != nil {
zerolog.Ctx(context.Background()).Warn().Err(err).Msg("Creating histogram for data source requests failed")
}
})
}

func newHandlerFromDef(def *minderv1.RestDataSource_Def) (*restHandler, error) {
if def == nil {
return nil, errors.New("rest data source handler definition is nil")
Expand All @@ -61,6 +87,8 @@ func newHandlerFromDef(def *minderv1.RestDataSource_Def) (*restHandler, error) {

bodyFromInput, body := parseRequestBodyConfig(def)

initMetrics()

return &restHandler{
rawInputSchema: def.GetInputSchema(),
inputSchema: schema,
Expand Down Expand Up @@ -141,14 +169,26 @@ func (h *restHandler) Call(ctx context.Context, _ *interfaces.Result, args any)
return h.doRequest(cli, req)
}

func recordMetrics(ctx context.Context, resp *http.Response, start time.Time) {
attrs := []attribute.KeyValue{
attribute.String("method", resp.Request.Method),
attribute.String("endpoint", resp.Request.URL.String()),
attribute.String("status_code", fmt.Sprintf("%d", resp.StatusCode)),
}

dataSourceLatencyHistogram.Record(ctx, time.Since(start).Milliseconds(), metric.WithAttributes(attrs...))
}

func (h *restHandler) doRequest(cli *http.Client, req *http.Request) (any, error) {
start := time.Now()
resp, err := retriableDo(cli, req)
if err != nil {
return nil, err
}

defer resp.Body.Close()

recordMetrics(req.Context(), resp, start)

bout, err := h.parseResponseBody(resp.Body)
if err != nil {
return nil, err
Expand Down Expand Up @@ -257,21 +297,36 @@ func buildRestOutput(statusCode int, body any) any {

func retriableDo(cli *http.Client, req *http.Request) (*http.Response, error) {
var resp *http.Response
retryCount := 0

err := backoff.Retry(func() error {
var err error
resp, err = cli.Do(req)
if err != nil {
zerolog.Ctx(req.Context()).Debug().
Err(err).
Int("retry", retryCount).
Msg("HTTP request failed, retrying")
retryCount++
return err
}

if resp.StatusCode == http.StatusTooManyRequests {
zerolog.Ctx(req.Context()).Debug().
Int("retry", retryCount).
Msg("rate limited, retrying")
retryCount++
return errors.New("rate limited")
}

return nil
}, backoff.WithMaxRetries(backoff.NewExponentialBackOff(), 5))

if err != nil {
zerolog.Ctx(req.Context()).Warn().
Err(err).
Int("retries", retryCount).
Msg("HTTP request failed after retries")
return nil, err
}

Expand Down
2 changes: 2 additions & 0 deletions internal/datasources/rest/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ func Test_restHandler_Call(t *testing.T) {
parse: tt.fields.parse,
testOnlyTransport: http.DefaultTransport,
}
initMetrics()

got, err := h.Call(context.Background(), nil, tt.args.args)
if tt.wantErr {
assert.Error(t, err)
Expand Down