Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BCF-3250]: Fix FilterLog error handling in LogPoller #11654

Merged
merged 7 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/poor-pumas-occur.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"chainlink": minor
---

#bugfix More robust error handling in LogPoller, including no more misleading CRITICAL errors emitted under non-critical conditions
105 changes: 104 additions & 1 deletion core/chains/evm/client/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/rpc"
pkgerrors "github.com/pkg/errors"

"github.com/smartcontractkit/chainlink-common/pkg/logger"
Expand Down Expand Up @@ -62,6 +63,7 @@ const (
Fatal
ServiceUnavailable
TerminallyStuck
TooManyResults
)

type ClientErrors map[int]*regexp.Regexp
Expand Down Expand Up @@ -298,6 +300,7 @@ func ClientErrorRegexes(errsRegex config.ClientErrors) *ClientErrors {
TransactionAlreadyMined: regexp.MustCompile(errsRegex.TransactionAlreadyMined()),
Fatal: regexp.MustCompile(errsRegex.Fatal()),
ServiceUnavailable: regexp.MustCompile(errsRegex.ServiceUnavailable()),
TooManyResults: regexp.MustCompile(errsRegex.TooManyResults()),
}
}

Expand Down Expand Up @@ -457,6 +460,11 @@ func isFatalSendError(err error) bool {
return false
}

var (
_ rpc.Error = JsonError{}
_ rpc.DataError = JsonError{}
)

// [email protected]/rpc/json.go
type JsonError struct {
Code int `json:"code"`
Expand All @@ -471,7 +479,17 @@ func (err JsonError) Error() string {
return err.Message
}

func (err *JsonError) String() string {
// To satisfy rpc.Error interface
func (err JsonError) ErrorCode() int {
return err.Code
}

// To satisfy rpc.DataError
func (err JsonError) ErrorData() interface{} {
return err.Data
}

func (err JsonError) String() string {
return fmt.Sprintf("json-rpc error { Code = %d, Message = '%s', Data = '%v' }", err.Code, err.Message, err.Data)
}

Expand Down Expand Up @@ -610,3 +628,88 @@ func ClassifySendError(err error, clientErrors config.ClientErrors, lggr logger.
lggr.Criticalw("Unknown error encountered when sending transaction", "err", err, "etx", tx)
return commonclient.Unknown
}

var infura = ClientErrors{
TooManyResults: regexp.MustCompile(`(: |^)query returned more than [0-9]+ results. Try with this block range \[0x[0-9A-F]+, 0x[0-9A-F]+\].$`),
}

var alchemy = ClientErrors{
TooManyResults: regexp.MustCompile(`(: |^)Log response size exceeded. You can make eth_getLogs requests with up to a [0-9A-Z]+ block range and no limit on the response size, or you can request any block range with a cap of [0-9A-Z]+ logs in the response. Based on your parameters and the response size limit, this block range should work: \[0x[0-9a-f]+, 0x[0-9a-f]+\]$`),
}

var quicknode = ClientErrors{
TooManyResults: regexp.MustCompile(`(: |^)eth_getLogs is limited to a [0-9,]+ range$`),
}

var simplyvc = ClientErrors{
TooManyResults: regexp.MustCompile(`too wide blocks range, the limit is [0-9,]+$`),
}

var drpc = ClientErrors{
TooManyResults: regexp.MustCompile(`(: |^)requested too many blocks from [0-9]+ to [0-9]+, maximum is set to [0-9,]+$`),
}

// Linkpool, Blockdaemon, and Chainstack all return "request timed out" if the log results are too large for them to process
var defaultClient = ClientErrors{
TooManyResults: regexp.MustCompile(`request timed out`),
}

// JSON-RPC error codes which can indicate a refusal of the server to process an eth_getLogs request because the result set is too large
const (
jsonRpcServerError = -32000 // Server error. SimplyVC uses this error code when too many results are returned

// Server timeout. When the rpc server has its own limit on how long it can take to compile the results
// Examples: Linkpool, Chainstack, Block Daemon
jsonRpcTimedOut = -32002

// See: https://github.com/ethereum/go-ethereum/blob/master/rpc/errors.go#L63
// Can occur if the rpc server is configured with a maximum byte limit on the response size of batch requests
jsonRpcResponseTooLarge = -32003

// Not implemented in geth by default, but is defined in EIP 1474 and implemented by infura and some other 3rd party rpc servers
// See: https://community.infura.io/t/getlogs-error-query-returned-more-than-1000-results/358/5
jsonRpcLimitExceeded = -32005 // See also: https://github.com/ethereum/EIPs/blob/master/EIPS/eip-1474.md

jsonRpcInvalidParams = -32602 // Invalid method params. Returned by alchemy if the block range is too large or there are too many results to return

jsonRpcQuicknodeTooManyResults = -32614 // Undocumented error code used by Quicknode for too many results error
)

func IsTooManyResults(err error, clientErrors config.ClientErrors) bool {
var rpcErr rpc.Error

if !pkgerrors.As(err, &rpcErr) {
return false
}
configErrors := ClientErrorRegexes(clientErrors)
if configErrors.ErrIs(rpcErr, TooManyResults) {
return true
}

switch rpcErr.ErrorCode() {
case jsonRpcResponseTooLarge:
return true
case jsonRpcLimitExceeded:
if infura.ErrIs(rpcErr, TooManyResults) {
return true
}
case jsonRpcInvalidParams:
if alchemy.ErrIs(rpcErr, TooManyResults) {
return true
}
case jsonRpcQuicknodeTooManyResults:
if quicknode.ErrIs(rpcErr, TooManyResults) {
return true
}
case jsonRpcTimedOut:
if defaultClient.ErrIs(rpcErr, TooManyResults) {
return true
}
case jsonRpcServerError:
if simplyvc.ErrIs(rpcErr, TooManyResults) ||
drpc.ErrIs(rpcErr, TooManyResults) {
return true
}
}
return false
}
85 changes: 85 additions & 0 deletions core/chains/evm/client/errors_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package client_test

import (
"encoding/json"
"errors"
"fmt"
"testing"

pkgerrors "github.com/pkg/errors"
Expand Down Expand Up @@ -439,3 +441,86 @@ func Test_Config_Errors(t *testing.T) {
assert.False(t, clientErrors.ErrIs(errors.New("some old bollocks"), evmclient.NonceTooLow))
})
}

func Test_IsTooManyResultsError(t *testing.T) {
customErrors := evmclient.NewTestClientErrors()

tests := []errorCase{
{`{
"code":-32602,
"message":"Log response size exceeded. You can make eth_getLogs requests with up to a 2K block range and no limit on the response size, or you can request any block range with a cap of 10K logs in the response. Based on your parameters and the response size limit, this block range should work: [0x0, 0x133e71]"}`,
true,
"alchemy",
}, {`{
"code":-32005,
"data":{"from":"0xCB3D","limit":10000,"to":"0x7B737"},
"message":"query returned more than 10000 results. Try with this block range [0xCB3D, 0x7B737]."}`,
true,
"infura",
}, {`{
"code":-32002,
"message":"request timed out"}`,
true,
"LinkPool-Blockdaemon-Chainstack",
}, {`{
"code":-32614,
"message":"eth_getLogs is limited to a 10,000 range"}`,
true,
"Quicknode",
}, {`{
"code":-32000,
"message":"too wide blocks range, the limit is 100"}`,
true,
"SimplyVC",
}, {`{
"message":"requested too many blocks from 0 to 16777216, maximum is set to 2048",
"code":-32000}`,
true,
"Drpc",
}, {`
<!DOCTYPE html>
<html>
<head>
<title>503 Backend fetch failed</title>
</head>
<body>
<h1>Error 503 Backend fetch failed</h1>
<p>Backend fetch failed</p>
<h3>Guru Meditation:</h3>
<p>XID: 343710611</p>
<hr>
<p>Varnish cache server</p>
</body>
</html>`,
false,
"Nirvana Labs"}, // This isn't an error response we can handle, but including for completeness. },

{`{
"code":-32000",
"message":"unrelated server error"}`,
false,
"any",
}, {`{
"code":-32500,
"message":"unrelated error code"}`,
false,
"any2",
}, {fmt.Sprintf(`{
"code" : -43106,
"message" : "%s"}`, customErrors.TooManyResults()),
true,
"custom chain with error specified in toml config",
},
}

for _, test := range tests {
t.Run(test.network, func(t *testing.T) {
jsonRpcErr := evmclient.JsonError{}
err := json.Unmarshal([]byte(test.message), &jsonRpcErr)
if err == nil {
err = jsonRpcErr
}
assert.Equal(t, test.expect, evmclient.IsTooManyResults(err, &customErrors))
})
}
}
3 changes: 3 additions & 0 deletions core/chains/evm/client/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type TestClientErrors struct {
transactionAlreadyMined string
fatal string
serviceUnavailable string
tooManyResults string
}

func NewTestClientErrors() TestClientErrors {
Expand All @@ -52,6 +53,7 @@ func NewTestClientErrors() TestClientErrors {
transactionAlreadyMined: "client error transaction already mined",
fatal: "client error fatal",
serviceUnavailable: "client error service unavailable",
tooManyResults: "client error too many results",
}
}

Expand All @@ -77,6 +79,7 @@ func (c *TestClientErrors) L2Full() string { return c.l2Full }
func (c *TestClientErrors) TransactionAlreadyMined() string { return c.transactionAlreadyMined }
func (c *TestClientErrors) Fatal() string { return c.fatal }
func (c *TestClientErrors) ServiceUnavailable() string { return c.serviceUnavailable }
func (c *TestClientErrors) TooManyResults() string { return c.serviceUnavailable }

type TestNodePoolConfig struct {
NodePollFailureThreshold uint32
Expand Down
4 changes: 2 additions & 2 deletions core/chains/evm/client/simulated_backend_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ func (c *SimulatedBackendClient) CallContract(ctx context.Context, msg ethereum.
res, err := c.b.CallContract(ctx, msg, blockNumber)
if err != nil {
dataErr := revertError{}
if errors.Is(err, &dataErr) {
if errors.As(err, &dataErr) {
return nil, &JsonError{Data: dataErr.ErrorData(), Message: dataErr.Error(), Code: 3}
}
// Generic revert, no data
Expand All @@ -434,7 +434,7 @@ func (c *SimulatedBackendClient) PendingCallContract(ctx context.Context, msg et
res, err := c.b.PendingCallContract(ctx, msg)
if err != nil {
dataErr := revertError{}
if errors.Is(err, &dataErr) {
if errors.As(err, &dataErr) {
return nil, &JsonError{Data: dataErr.ErrorData(), Message: dataErr.Error(), Code: 3}
}
// Generic revert, no data
Expand Down
1 change: 1 addition & 0 deletions core/chains/evm/config/chain_scoped_client_errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ func (c *clientErrorsConfig) Fatal() string { return derefOrDefault(c.c.Fatal) }
func (c *clientErrorsConfig) ServiceUnavailable() string {
return derefOrDefault(c.c.ServiceUnavailable)
}
func (c *clientErrorsConfig) TooManyResults() string { return derefOrDefault(c.c.TooManyResults) }
1 change: 1 addition & 0 deletions core/chains/evm/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ type ClientErrors interface {
TransactionAlreadyMined() string
Fatal() string
ServiceUnavailable() string
TooManyResults() string
}

type Transactions interface {
Expand Down
2 changes: 2 additions & 0 deletions core/chains/evm/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ func TestClientErrorsConfig(t *testing.T) {
TransactionAlreadyMined: ptr("client error transaction already mined"),
Fatal: ptr("client error fatal"),
ServiceUnavailable: ptr("client error service unavailable"),
TooManyResults: ptr("client error too many results"),
},
}
})
Expand All @@ -372,6 +373,7 @@ func TestClientErrorsConfig(t *testing.T) {
assert.Equal(t, "client error transaction already mined", errors.TransactionAlreadyMined())
assert.Equal(t, "client error fatal", errors.Fatal())
assert.Equal(t, "client error service unavailable", errors.ServiceUnavailable())
assert.Equal(t, "client error too many results", errors.TooManyResults())
})
}

Expand Down
4 changes: 4 additions & 0 deletions core/chains/evm/config/toml/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,7 @@ type ClientErrors struct {
TransactionAlreadyMined *string `toml:",omitempty"`
Fatal *string `toml:",omitempty"`
ServiceUnavailable *string `toml:",omitempty"`
TooManyResults *string `toml:",omitempty"`
}

func (r *ClientErrors) setFrom(f *ClientErrors) bool {
Expand Down Expand Up @@ -849,6 +850,9 @@ func (r *ClientErrors) setFrom(f *ClientErrors) bool {
if v := f.ServiceUnavailable; v != nil {
r.ServiceUnavailable = v
}
if v := f.TooManyResults; v != nil {
r.TooManyResults = v
}
return true
}

Expand Down
16 changes: 8 additions & 8 deletions core/chains/evm/logpoller/log_poller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/smartcontractkit/chainlink-common/pkg/utils/mathutil"

"github.com/smartcontractkit/chainlink/v2/core/chains/evm/client"
"github.com/smartcontractkit/chainlink/v2/core/chains/evm/config"
evmtypes "github.com/smartcontractkit/chainlink/v2/core/chains/evm/types"
ubig "github.com/smartcontractkit/chainlink/v2/core/chains/evm/utils/big"
)
Expand Down Expand Up @@ -113,6 +114,7 @@ type logPoller struct {
backfillBatchSize int64 // batch size to use when backfilling finalized logs
rpcBatchSize int64 // batch size to use for fallback RPC calls made in GetBlocks
logPrunePageSize int64
clientErrors config.ClientErrors
backupPollerNextBlock int64 // next block to be processed by Backup LogPoller
backupPollerBlockDelay int64 // how far behind regular LogPoller should BackupLogPoller run. 0 = disabled

Expand Down Expand Up @@ -143,6 +145,7 @@ type Opts struct {
KeepFinalizedBlocksDepth int64
BackupPollerBlockDelay int64
LogPrunePageSize int64
ClientErrors config.ClientErrors
}

// NewLogPoller creates a log poller. Note there is an assumption
Expand Down Expand Up @@ -172,6 +175,7 @@ func NewLogPoller(orm ORM, ec Client, lggr logger.Logger, headTracker HeadTracke
rpcBatchSize: opts.RpcBatchSize,
keepFinalizedBlocksDepth: opts.KeepFinalizedBlocksDepth,
logPrunePageSize: opts.LogPrunePageSize,
clientErrors: opts.ClientErrors,
filters: make(map[string]Filter),
filterDirty: true, // Always build Filter on first call to cache an empty filter if nothing registered yet.
finalityViolated: new(atomic.Bool),
Expand Down Expand Up @@ -794,8 +798,6 @@ func (lp *logPoller) blocksFromLogs(ctx context.Context, logs []types.Log, endBl
return lp.GetBlocksRange(ctx, numbers)
}

const jsonRpcLimitExceeded = -32005 // See https://github.com/ethereum/EIPs/blob/master/EIPS/eip-1474.md

// backfill will query FilterLogs in batches for logs in the
// block range [start, end] and save them to the db.
// Retries until ctx cancelled. Will return an error if cancelled
Expand All @@ -807,13 +809,11 @@ func (lp *logPoller) backfill(ctx context.Context, start, end int64) error {

gethLogs, err := lp.ec.FilterLogs(ctx, lp.Filter(big.NewInt(from), big.NewInt(to), nil))
if err != nil {
var rpcErr client.JsonError
if pkgerrors.As(err, &rpcErr) {
if rpcErr.Code != jsonRpcLimitExceeded {
lp.lggr.Errorw("Unable to query for logs", "err", err, "from", from, "to", to)
return err
}
if !client.IsTooManyResults(err, lp.clientErrors) {
lp.lggr.Errorw("Unable to query for logs", "err", err, "from", from, "to", to)
return err
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the logic below makes sense in the case of LimitExceeded. Why group them together?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic was designed specifically for the LimitExceeded error, which is returned whenever the number of results that would have come back from an eth_getLogs request exceeds a limit set by the rpc server. (In my testing, this was found to be 10,000 for most 3rd party rpc servers, such was alchemy and infura.)

That one has been in the PR since Dec, I just added this other one (ResultsTooLarge) because the name of it sounds like it might get returned by some rpc servers under similar circumstances--we have no actual evidence that this is the case, I'm just guessing on that because the name sounds like what LimitExceeded does in practice. We can leave this one out, but we can't leave out LimitExceeded since that's what this error handling was added for.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the current comment for LimitExceeded accurate? Rate limiting is significantly different from result size limiting.

Copy link
Contributor Author

@reductionista reductionista Aug 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It had been a while since I tested this, so I just went through and tested the responses using curl on a dozen or more different rpc servers.
Sadly, they are much more varied than I'd remembered (although in Dec I think I'd only tried it on 3 providers)--we may have to rely primarily on string parsing 😩, or at least use that in conjunction with error codes.

Here is the response infura gives, which is the LimitExceeded error:

{"jsonrpc":"2.0","id":1,"error":{"code":-32005,"data":{"from":"0xCB3D","limit":10000,"to":"0x7B737"},"message":"query returned more than 10000 results. Try with this block range [0xCB3D, 0x7B737]."}}

Looking at their docs, they also return the same error code if you've exceeded your quota for the day, or are sending requests too fast. So the comment is accurate, but you're right that they are lumping very different types of errors together under the same error code--I guess we would need to check either "data" or "message" to differentiate it from other types of rate limiting.

Some other examples of responses (I was either mistaken about Alchemy, or they changed it since then).

Alchemy:

{"jsonrpc":"2.0","id":1,"error":{"code":-32602,"message":"Log response size exceeded. You can make eth_getLogs requests with up to a 2K block range and no limit on the response size, or you can request any block range with a cap of 10K logs in the response. Based on your parameters and the response size limit, this block range should work: [0x0, 0x133e71]"}}

(from their docs, -32602 means "invalid method parameters")

LinkPool, Blockdaemon, Chainstack:

{"jsonrpc":"2.0","id":1,"error":{"code":-32002,"message":"request timed out"}}

Quicknode BSC, Fantom:

{"jsonrpc":"2.0","id":1,"error":{"code":-32614,"message":"eth_getLogs is limited to a 10,000 range"}}

(their docs list a number of custom error codes, up to and including -32613 but don't mention -32614 !)

SimplyVC Fanton:

{"jsonrpc":"2.0","id":1,"error":{"code":-32000,"message":"too wide blocks range, the limit is 100"}}

Drpc Avalanche:

{"id":1,"jsonrpc":"2.0","error":{"message":"requested too many blocks from 0 to 16777216, maximum is set to 2048","code":-32000}}

Nirvana Labs:
(after pause for 60+ seconds... )

<!DOCTYPE html>
<html>
  <head>
    <title>503 Backend fetch failed</title>
  </head>
  <body>
    <h1>Error 503 Backend fetch failed</h1>
    <p>Backend fetch failed</p>
    <h3>Guru Meditation:</h3>
    <p>XID: 343710611</p>
    <hr>
    <p>Varnish cache server</p>
  </body>
</html>

(I double checked that the Nirvana Labs endpoint is valid, and responds to an eth_getLogs request with a smaller block range properly--very bad behavior on their part!)

if batchSize == 1 {
lp.lggr.Criticalw("Too many log results in a single block, failed to retrieve logs! Node may be running in a degraded state.", "err", err, "from", from, "to", to, "LogBackfillBatchSize", lp.backfillBatchSize)
return err
Expand Down
Loading
Loading