Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC: Flight recording of slow requests (above 500ms) #216

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ Ref: https://keepachangelog.com/en/1.0.0/

# Changelog

## Unreleased

- Flight recording of slow requests (above 500ms) and dependency updates

## v0.17.11

- Fix Astroport PCL spot price bug - failure to utilize token out denom for quote estimate in edge cases
Expand Down
2 changes: 1 addition & 1 deletion app/sidecar_query_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (sqs *sideCarQueryServer) Start(context.Context) error {
func NewSideCarQueryServer(appCodec codec.Codec, config domain.Config, logger log.Logger) (SideCarQueryServer, error) {
// Setup echo server
e := echo.New()
middleware := middleware.InitMiddleware(config.CORS)
middleware := middleware.InitMiddleware(config.CORS, logger)
e.Use(middleware.CORS)
e.Use(middleware.InstrumentMiddleware)
e.Use(middleware.TraceWithParamsMiddleware("sqs"))
Expand Down
22 changes: 9 additions & 13 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,16 @@ require (
go.opentelemetry.io/otel/trace v1.25.0
go.uber.org/goleak v1.3.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/crypto v0.21.0 // indirect
golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc // indirect
golang.org/x/net v0.23.0 // indirect
golang.org/x/crypto v0.23.0 // indirect
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842
golang.org/x/net v0.25.0 // indirect
golang.org/x/oauth2 v0.17.0 // indirect
golang.org/x/sync v0.6.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/term v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.18.0 // indirect
golang.org/x/tools v0.21.0 // indirect
google.golang.org/api v0.162.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de // indirect
Expand Down Expand Up @@ -250,8 +250,7 @@ replace (

// Our cosmos-sdk branch is: https://github.com/osmosis-labs/cosmos-sdk/tree/osmo-v22/v0.47.5, current branch: osmo-v22/v0.47.5. Direct commit link: https://github.com/osmosis-labs/cosmos-sdk/commit/cf358f6fc20cabfb5cb8ce75fc10b2623150869e
// https://github.com/osmosis-labs/cosmos-sdk/releases/tag/v0.47.5-v22-osmo-2
github.com/cosmos/cosmos-sdk => github.com/osmosis-labs/cosmos-sdk v0.47.5-v24-osmo-5
github.com/cosmos/gogoproto => github.com/cosmos/gogoproto v1.4.10
github.com/cosmos/cosmos-sdk => github.com/osmosis-labs/cosmos-sdk v0.47.6-0.20240511221818-474a846bc488
github.com/cosmos/iavl => github.com/cosmos/iavl v1.1.2-0.20240405172238-7f92c6b356ac
github.com/gogo/protobuf => github.com/regen-network/protobuf v1.3.3-alpha.regen.1

Expand All @@ -262,9 +261,6 @@ replace (

// replace as directed by sdk upgrading.md https://github.com/cosmos/cosmos-sdk/blob/393de266c8675dc16cc037c1a15011b1e990975f/UPGRADING.md?plain=1#L713
github.com/syndtr/goleveldb => github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7

// newer versions of exp treat sorting differently, which is incompatible with the current version of cosmos-sdk
golang.org/x/exp => golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb
)

exclude github.com/cosmos/cosmos-sdk v0.50.1
Expand Down
647 changes: 53 additions & 594 deletions go.sum

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions go.work
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
go 1.21.4
go 1.22

toolchain go1.21.9
toolchain go1.22.3

use .

Expand Down
Binary file added go1.22.3.linux-amd64.tar.gz
Binary file not shown.
64 changes: 61 additions & 3 deletions middleware/middleware.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
package middleware

import (
"bytes"
"context"
"fmt"
"os"
"sync"

"time"

"go.uber.org/zap"
gotrace "golang.org/x/exp/trace"

"github.com/labstack/echo/v4"
"github.com/osmosis-labs/sqs/domain"
"github.com/osmosis-labs/sqs/log"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
Expand All @@ -16,8 +25,16 @@ import (
// GoMiddleware represent the data-struct for middleware
type GoMiddleware struct {
corsConfig domain.CORSConfig
logger log.Logger
}

const (
// Name of the flight recorder trace
flightRecorderTraceName = "flight-trace.out"
// The latency threshold in milliseconds to record traces
flightRecorderThresholdMs = 500
)

var (
// total number of requests counter
requestsTotal = prometheus.NewCounterVec(
Expand All @@ -37,6 +54,9 @@ var (
},
[]string{"method", "endpoint"},
)

// flight recorder
recordFlightOnce sync.Once
)

func init() {
Expand All @@ -55,14 +75,22 @@ func (m *GoMiddleware) CORS(next echo.HandlerFunc) echo.HandlerFunc {
}

// InitMiddleware initialize the middleware
func InitMiddleware(corsConfig *domain.CORSConfig) *GoMiddleware {
func InitMiddleware(corsConfig *domain.CORSConfig, logger log.Logger) *GoMiddleware {
return &GoMiddleware{
corsConfig: *corsConfig,
logger: logger,
}
}

// InstrumentMiddleware will handle the instrumentation middleware
func (m *GoMiddleware) InstrumentMiddleware(next echo.HandlerFunc) echo.HandlerFunc {
// Set up the flight recorder.
fr := gotrace.NewFlightRecorder()
err := fr.Start()
if err != nil {
m.logger.Error("failed to start flight recorder", zap.Error(err))
}

return func(c echo.Context) error {
start := time.Now()

Expand All @@ -83,10 +111,40 @@ func (m *GoMiddleware) InstrumentMiddleware(next echo.HandlerFunc) echo.HandlerF

err = next(c)

duration := time.Since(start).Seconds()
duration := time.Since(start)

// Observe the duration with the histogram
requestLatency.WithLabelValues(requestMethod, requestPath).Observe(duration)
requestLatency.WithLabelValues(requestMethod, requestPath).Observe(duration.Seconds())

// Record outliers to the flight recorder for further analysis
if duration > flightRecorderThresholdMs*time.Millisecond {
recordFlightOnce.Do(func() {
// Note: we skip error handling since we don't want to interrupt the request handling
// with tracing errors.

// Grab the snapshot.
var b bytes.Buffer
_, err = fr.WriteTo(&b)
if err != nil {
m.logger.Error("failed to write trace to buffer", zap.Error(err))
return
}

// Write it to a file.
err = os.WriteFile(flightRecorderTraceName, b.Bytes(), 0o755)
if err != nil {
m.logger.Error("failed to write trace to file", zap.Error(err))
return
}

err = fr.Stop()
if err != nil {
fmt.Println("failed to stop flight recorder: ", err)
m.logger.Error("failed to stop fligt recorder", zap.Error(err))
return
}
})
}

return err
}
Expand Down
Binary file added perf.data
Binary file not shown.
Binary file added perf.data.old
Binary file not shown.
Loading
Loading