Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Commit

Permalink
Enhancement: Session Error handling and caching (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
nodiesBlade authored Mar 30, 2024
1 parent 49f1696 commit 2d3605a
Show file tree
Hide file tree
Showing 26 changed files with 353 additions and 918 deletions.
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,10 @@
/*.sqlite

## environment vars
.env
.env

## docker compose file
docker-compose.yml

## MAC
.DS_Store
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ To onboard the gateway server without having to dig deep, you can follow the [Qu
Every release candidate is published to https://github.com/pokt-network/gateway-server/pkgs/container/pocket-gateway-server

## Docker Compose
There is an all-inclusive docker-compose file available for development [docker-compose.yml](docker-compose.yml)
There is an all-inclusive docker-compose file available for development [docker-compose.yml](docker-compose.yml.sample)

## Minimum Hardware Requirements to run
- 1GB of RAM
Expand Down
30 changes: 16 additions & 14 deletions cmd/gateway_server/internal/config/dot_env_config_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,45 +87,47 @@ func (c DotEnvGlobalConfigProvider) GetAltruistRequestTimeout() time.Duration {
func NewDotEnvConfigProvider() *DotEnvGlobalConfigProvider {
_ = godotenv.Load()

poktRPCTimeout, err := time.ParseDuration(getEnvVar(poktRPCTimeoutEnv))
poktRPCTimeout, err := time.ParseDuration(getEnvVar(poktRPCTimeoutEnv, ""))
if err != nil {
panic(fmt.Sprintf("Error parsing %s: %s", poktRPCTimeoutEnv, err))
}

httpServerPort, err := strconv.ParseUint(getEnvVar(httpServerPortEnv), 10, 64)
httpServerPort, err := strconv.ParseUint(getEnvVar(httpServerPortEnv, ""), 10, 64)
if err != nil {
panic(fmt.Sprintf("Error parsing %s: %s", httpServerPortEnv, err))
}

sessionCacheTTLDuration, err := time.ParseDuration(getEnvVar(sessionCacheTTLEnv))
sessionCacheTTLDuration, err := time.ParseDuration(getEnvVar(sessionCacheTTLEnv, ""))
if err != nil {
panic(fmt.Sprintf("Error parsing %s: %s", sessionCacheTTLDuration, err))
}

altruistRequestTimeoutDuration, err := time.ParseDuration(getEnvVar(altruistRequestTimeoutEnv))
altruistRequestTimeoutDuration, err := time.ParseDuration(getEnvVar(altruistRequestTimeoutEnv, defaultAltruistRequestTimeout.String()))
if err != nil {
// Provide a default to prevent any breaking changes with new env variable.
altruistRequestTimeoutDuration = defaultAltruistRequestTimeout
}

return &DotEnvGlobalConfigProvider{
poktRPCFullHost: getEnvVar(poktRPCFullHostEnv),
poktRPCFullHost: getEnvVar(poktRPCFullHostEnv, ""),
httpServerPort: uint(httpServerPort),
poktRPCRequestTimeout: poktRPCTimeout,
sessionCacheTTL: sessionCacheTTLDuration,
databaseConnectionUrl: getEnvVar(dbConnectionUrlEnv),
environmentStage: global_config.EnvironmentStage(getEnvVar(environmentStageEnv)),
poktApplicationsEncryptionKey: getEnvVar(poktApplicationsEncryptionKeyEnv),
apiKey: getEnvVar(apiKey),
databaseConnectionUrl: getEnvVar(dbConnectionUrlEnv, ""),
environmentStage: global_config.EnvironmentStage(getEnvVar(environmentStageEnv, "")),
poktApplicationsEncryptionKey: getEnvVar(poktApplicationsEncryptionKeyEnv, ""),
apiKey: getEnvVar(apiKey, ""),
altruistRequestTimeout: altruistRequestTimeoutDuration,
}
}

// getEnvVar retrieves the value of the environment variable with error handling.
func getEnvVar(name string) string {
value, exists := os.LookupEnv(name)
if !exists {
panic(fmt.Errorf("%s not set", name))
func getEnvVar(name string, defaultValue string) string {
if value, exists := os.LookupEnv(name); exists {
return value
}
return value
if defaultValue != "" {
return defaultValue
}
panic(fmt.Errorf("%s not set", name))
}
1 change: 1 addition & 0 deletions cmd/gateway_server/internal/models/qos_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ type PublicQosNode struct {
IsHeathy bool `json:"is_heathy"`
IsSynced bool `json:"is_synced"`
LastKnownHeight uint64 `json:"last_known_height"`
P90Latency float64 `json:"p90_latency"`
}
10 changes: 8 additions & 2 deletions cmd/gateway_server/internal/transform/qos_node.go
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
package transform

import (
"math"
"pokt_gateway_server/cmd/gateway_server/internal/models"
internal_model "pokt_gateway_server/internal/node_selector_service/models"
)

func ToPublicQosNode(node *internal_model.QosNode) *models.PublicQosNode {
latency := node.LatencyTracker.GetP90Latency()
if math.IsNaN(latency) {
latency = 0.0
}
return &models.PublicQosNode{
ServiceUrl: node.MorseNode.ServiceUrl,
Chain: node.GetChain(),
SessionHeight: node.PocketSession.SessionHeader.SessionHeight,
AppPublicKey: node.AppSigner.PublicKey,
SessionHeight: node.MorseSession.SessionHeader.SessionHeight,
AppPublicKey: node.MorseSigner.PublicKey,
TimeoutReason: string(node.GetTimeoutReason()),
LastKnownErr: node.GetLastKnownErrorStr(),
IsHeathy: node.IsHealthy(),
IsSynced: node.IsSynced(),
LastKnownHeight: node.GetLastKnownHeight(),
TimeoutUntil: node.GetTimeoutUntil(),
P90Latency: latency,
}
}
4 changes: 2 additions & 2 deletions cmd/gateway_server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ func main() {
ttlcache.WithTTL[string, *session_registry.Session](gatewayConfigProvider.GetSessionCacheTTL()),
)

nodeCache := ttlcache.New[string, []*qos_models.QosNode](
ttlcache.WithTTL[string, []*qos_models.QosNode](gatewayConfigProvider.GetSessionCacheTTL()),
nodeCache := ttlcache.New[qos_models.SessionChainKey, []*qos_models.QosNode](
ttlcache.WithTTL[qos_models.SessionChainKey, []*qos_models.QosNode](gatewayConfigProvider.GetSessionCacheTTL()),
)

poktApplicationRegistry := apps_registry.NewCachedAppsRegistry(client, querier, gatewayConfigProvider, logger.Named("pokt_application_registry"))
Expand Down
33 changes: 0 additions & 33 deletions docker-compose.yml

This file was deleted.

42 changes: 42 additions & 0 deletions docker-compose.yml.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
version: '3'

services:
gateway-server:
build: .
volumes:
- .env:/app/.env
ports:
- "${HTTP_SERVER_PORT}:${HTTP_SERVER_PORT}"
env_file:
- ./.env
networks:
- bridged_network
# depends_on:
# - postgres
restart: on-failure
logging:
driver: json-file
options:
max-size: 10m

# this postgres database is only to be used for testing. It should not be used in production systems
# Leverage a production ready postgres database with HA/replicas in prod.
# postgres:
# image: postgres:latest
# environment:
# POSTGRES_DB: postgres
# POSTGRES_USER: myuser
# POSTGRES_PASSWORD: mypassword
# ports:
# - "5433:5432"
# volumes:
# - postgres_data:/var/lib/postgresql/data
# networks:
# - bridged_network

volumes:
postgres_data:

networks:
bridged_network:
driver: bridge
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ require (
github.com/golang/protobuf v1.5.3 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/influxdata/tdigest v0.0.1 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
github.com/jackc/pgio v1.0.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
Expand Down
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgj
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
Expand All @@ -55,6 +56,8 @@ github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/influxdata/tdigest v0.0.1 h1:XpFptwYmnEKUqmkcDjrzffswZ3nvNeevbUSLPP/ZzIY=
github.com/influxdata/tdigest v0.0.1/go.mod h1:Z0kXnxzbTC2qrx4NaIzYkE1k66+6oEDQTvL95hQFh5Y=
github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo=
github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8=
Expand Down Expand Up @@ -227,6 +230,7 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
Expand Down Expand Up @@ -279,6 +283,7 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
Expand All @@ -297,8 +302,10 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ=
gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo=
gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func SendRelaysAsync(relayer pokt_v0.PocketRelayer, nodes []*models.QosNode, pay
Payload: &relayer_models.Payload{Data: payload, Method: method},
Chain: node.GetChain(),
SelectedNodePubKey: node.GetPublicKey(),
Session: node.PocketSession,
Session: node.MorseSession,
})
relayResponses <- &nodeRelayResponse{
Node: node,
Expand Down
38 changes: 21 additions & 17 deletions internal/node_selector_service/checks/error_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,23 @@ const timeoutErrorPenalty = time.Second * 15
// 24 hours is analogous to indefinite
const kickOutSessionPenalty = time.Hour * 24

const (
errPocketInvalidServicerMsg = "failed to find correct servicer PK"
errPocketInvalidBlockHeightMsg = "the block height passed is invalid"
errPocketRequestTimeoutMsg = "request timeout"
errPocketOverServiceMsg = "the max number of relays serviced for this node is exceeded"
errPocketMaximumEvidenceSealedMsg = "the evidence is sealed, either max relays reached or claim already submitted"
var (
errsKickSession = []string{"failed to find correct servicer PK", "the max number of relays serviced for this node is exceeded", "the evidence is sealed, either max relays reached or claim already submitted"}
errsTimeout = []string{"connection refused", "the request block height is out of sync with the current block height", "no route to host", "unexpected EOF", "i/o timeout", "tls: failed to verify certificate", "no such host", "the block height passed is invalid", "request timeout"}
)

const (
errHttpSSLExpired = "tls: failed to verify certificate"
errHttpNoSuchHostMsg = "no such host"
)
func doesErrorContains(errsSubString []string, err error) bool {
if err == nil {
return false
}
errStr := err.Error()
for _, errSubString := range errsSubString {
if strings.Contains(errStr, errSubString) {
return true
}
}
return false
}

// isKickableSessionErr - determines if a node should be kicked from a session to send relays
func isKickableSessionErr(err error) bool {
Expand All @@ -36,23 +41,22 @@ func isKickableSessionErr(err error) bool {
}
// Fallback in the event the error is not parsed correctly due to node operator configurations / custom clients, resort to a simple string check
// node runner cannot serve with expired ssl
if err != nil && (strings.Contains(err.Error(), errHttpSSLExpired) || strings.Contains(err.Error(), errPocketOverServiceMsg) || strings.Contains(err.Error(), errPocketMaximumEvidenceSealedMsg) || strings.Contains(err.Error(), errPocketInvalidServicerMsg)) {
return true
}
return false
return doesErrorContains(errsKickSession, err)
}

func isTimeoutError(err error) bool {
// If Invalid block height, pocket is not caught up to latest session
if err == relayer_models.ErrPocketCoreInvalidBlockHeight {
return true
}

// Check if pocket error returns 500
pocketError, ok := err.(relayer_models.PocketRPCError)
if ok {
return pocketError.HttpCode >= 500
if ok && pocketError.HttpCode >= 500 {
return true
}
// Fallback in the event the error is not parsed correctly due to node operator configurations / custom clients, resort to a simple string check
return err == fasthttp.ErrTimeout || err == fasthttp.ErrDialTimeout || err == fasthttp.ErrTLSHandshakeTimeout || err != nil && (strings.Contains(err.Error(), errHttpNoSuchHostMsg) || strings.Contains(err.Error(), errPocketRequestTimeoutMsg) || strings.Contains(err.Error(), errPocketInvalidBlockHeightMsg))
return err == fasthttp.ErrConnectionClosed || err == fasthttp.ErrTimeout || err == fasthttp.ErrDialTimeout || err == fasthttp.ErrTLSHandshakeTimeout || doesErrorContains(errsTimeout, err)
}

// DefaultPunishNode generic punisher for whenever a node returns an error independent of a specific check
Expand Down
Loading

0 comments on commit 2d3605a

Please sign in to comment.