From 02e90697c4050b115a7a6846987920d58294cfd0 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Fri, 15 Nov 2024 11:09:13 -0600 Subject: [PATCH 01/45] Added rate limiting. Signed-off-by: Cody Littley --- relay/config.go | 26 +++-- relay/limiter/blob_rate_limiter.go | 93 ++++++++++++++++++ relay/limiter/chunk_rate_limiter.go | 143 ++++++++++++++++++++++++++++ relay/limiter/config.go | 58 +++++++++++ relay/server.go | 47 ++++++++- 5 files changed, 355 insertions(+), 12 deletions(-) create mode 100644 relay/limiter/blob_rate_limiter.go create mode 100644 relay/limiter/chunk_rate_limiter.go create mode 100644 relay/limiter/config.go diff --git a/relay/config.go b/relay/config.go index c44349190e..7d33c43323 100644 --- a/relay/config.go +++ b/relay/config.go @@ -1,6 +1,9 @@ package relay -import core "github.com/Layr-Labs/eigenda/core/v2" +import ( + core "github.com/Layr-Labs/eigenda/core/v2" + "github.com/Layr-Labs/eigenda/relay/limiter" +) // Config is the configuration for the relay Server. type Config struct { @@ -29,16 +32,25 @@ type Config struct { // ChunkMaxConcurrency is the size of the work pool for fetching chunks. Default is 32. Note that this does not // impact concurrency utilized by the s3 client to upload/download fragmented files. ChunkMaxConcurrency int + + // MaxKeysPerGetChunksRequest is the maximum number of keys that can be requested in a single GetChunks request. + // Default is 1024. // TODO should this be the max batch size? What is that? + MaxKeysPerGetChunksRequest int + + // RateLimits contains configuration for rate limiting. + RateLimits limiter.Config } // DefaultConfig returns the default configuration for the relay Server. func DefaultConfig() *Config { return &Config{ - MetadataCacheSize: 1024 * 1024, - MetadataMaxConcurrency: 32, - BlobCacheSize: 32, - BlobMaxConcurrency: 32, - ChunkCacheSize: 32, - ChunkMaxConcurrency: 32, + MetadataCacheSize: 1024 * 1024, + MetadataMaxConcurrency: 32, + BlobCacheSize: 32, + BlobMaxConcurrency: 32, + ChunkCacheSize: 32, + ChunkMaxConcurrency: 32, + MaxKeysPerGetChunksRequest: 1024, + RateLimits: *limiter.DefaultConfig(), } } diff --git a/relay/limiter/blob_rate_limiter.go b/relay/limiter/blob_rate_limiter.go new file mode 100644 index 0000000000..8c56e5e036 --- /dev/null +++ b/relay/limiter/blob_rate_limiter.go @@ -0,0 +1,93 @@ +package limiter + +import ( + "fmt" + "golang.org/x/time/rate" + "golang.org/x/tools/container/intsets" + "sync/atomic" + "time" +) + +// TODO test + +// BlobRateLimiter enforces rate limits on GetBlob operations. +type BlobRateLimiter struct { + + // config is the rate limit configuration. + config *Config + + // opLimiter enforces rate limits on the maximum rate of GetBlob operations + opLimiter *rate.Limiter + + // bandwidthLimiter enforces rate limits on the maximum bandwidth consumed by GetBlob operations. Only the size + // of the blob data is considered, not the size of the entire response. + bandwidthLimiter *rate.Limiter + + // operationsInFlight is the number of GetBlob operations currently in flight. + operationsInFlight atomic.Int64 +} + +func NewBlobRateLimiter(config *Config) *BlobRateLimiter { + globalGetBlobOpLimiter := rate.NewLimiter(rate.Limit(config.MaxGetBlobOpsPerSecond), 1) + + // Burst size is set to MaxInt. This is safe, as the requested size is always a size we've + // determined by reading the blob metadata, which is guaranteed to respect maximum blob size. + globalGetBlobBandwidthLimiter := rate.NewLimiter(rate.Limit(config.MaxGetBlobBytesPerSecond), intsets.MaxInt) + + return &BlobRateLimiter{ + config: config, + opLimiter: globalGetBlobOpLimiter, + bandwidthLimiter: globalGetBlobBandwidthLimiter, + } +} + +// BeginGetBlobOperation should be called when a GetBlob operation is about to begin. If it returns an error, +// the operation should not be performed. If it does not return an error, FinishGetBlobOperation should be +// called when the operation completes. +func (l *BlobRateLimiter) BeginGetBlobOperation(now time.Time) error { + if l == nil { + // If the rate limiter is nil, do not enforce rate limits. + return nil + } + + countInFlight := l.operationsInFlight.Add(1) + if countInFlight > int64(l.config.MaxConcurrentGetBlobOps) { + l.operationsInFlight.Add(-1) + return fmt.Errorf("global concurrent request limit exceeded for getBlob operations, try again later") + } + + allowed := l.opLimiter.AllowN(now, 1) + + if !allowed { + l.operationsInFlight.Add(-1) + return fmt.Errorf("global rate limit exceeded for getBlob operations, try again later") + } + return nil +} + +// FinishGetBlobOperation should be called exactly once for each time BeginGetBlobOperation is called and +// returns nil. +func (l *BlobRateLimiter) FinishGetBlobOperation() { + if l == nil { + // If the rate limiter is nil, do not enforce rate limits. + return + } + + l.operationsInFlight.Add(-1) +} + +// RequestGetBlobBandwidth should be called when a GetBlob is about to start downloading blob data +// from S3. It returns an error if there is insufficient bandwidth available. If it returns nil, the +// operation should proceed. +func (l *BlobRateLimiter) RequestGetBlobBandwidth(now time.Time, bytes uint32) error { + if l == nil { + // If the rate limiter is nil, do not enforce rate limits. + return nil + } + + allowed := l.bandwidthLimiter.AllowN(now, int(bytes)) + if !allowed { + return fmt.Errorf("global rate limit exceeded for getBlob bandwidth, try again later") + } + return nil +} diff --git a/relay/limiter/chunk_rate_limiter.go b/relay/limiter/chunk_rate_limiter.go new file mode 100644 index 0000000000..d71722b1f9 --- /dev/null +++ b/relay/limiter/chunk_rate_limiter.go @@ -0,0 +1,143 @@ +package limiter + +import ( + "fmt" + "golang.org/x/time/rate" + "golang.org/x/tools/container/intsets" + "sync/atomic" + "time" +) + +// TODO test + +// ChunkRateLimiter enforces rate limits on GetChunk operations. +type ChunkRateLimiter struct { + + // config is the rate limit configuration. + config *Config + + // global limiters + + // globalOpLimiter enforces global rate limits on the maximum rate of GetChunk operations + globalOpLimiter *rate.Limiter + + // globalBandwidthLimiter enforces global rate limits on the maximum bandwidth consumed by GetChunk operations. + globalBandwidthLimiter *rate.Limiter + + // globalOperationsInFlight is the number of GetChunk operations currently in flight. + globalOperationsInFlight atomic.Int64 + + // per-client limiters + + // Note: in its current form, these expose a DOS vector, since an attacker can create many clients IDs + // and force these maps to become arbitrarily large. This will be remedied when authentication + // is implemented, as only authentication will happen prior to rate limiting. + + // perClientOpLimiter enforces per-client rate limits on the maximum rate of GetChunk operations + perClientOpLimiter map[string]*rate.Limiter + + // perClientBandwidthLimiter enforces per-client rate limits on the maximum bandwidth consumed by + // GetChunk operations. + perClientBandwidthLimiter map[string]*rate.Limiter + + // perClientOperationsInFlight is the number of GetChunk operations currently in flight for each client. + perClientOperationsInFlight map[string]*atomic.Int64 +} + +func NewChunkRateLimiter(config *Config) *ChunkRateLimiter { + + globalOpLimiter := rate.NewLimiter(rate.Limit(config.MaxGetChunkOpsPerSecond), 1) + globalBandwidthLimiter := rate.NewLimiter(rate.Limit(config.MaxGetChunkBytesPerSecond), intsets.MaxInt) + + return &ChunkRateLimiter{ + config: config, + globalOpLimiter: globalOpLimiter, + globalBandwidthLimiter: globalBandwidthLimiter, + globalOperationsInFlight: atomic.Int64{}, + perClientOpLimiter: make(map[string]*rate.Limiter), + perClientBandwidthLimiter: make(map[string]*rate.Limiter), + perClientOperationsInFlight: make(map[string]*atomic.Int64), + } +} + +// BeginGetChunkOperation should be called when a GetChunk operation is about to begin. If it returns an error, +// the operation should not be performed. If it does not return an error, FinishGetChunkOperation should be +// called when the operation completes. +func (l *ChunkRateLimiter) BeginGetChunkOperation( + now time.Time, + requesterID string) error { + if l == nil { + // If the rate limiter is nil, do not enforce rate limits. + return nil + } + + countInFlight := l.globalOperationsInFlight.Add(1) + if countInFlight > int64(l.config.MaxConcurrentGetChunkOps) { + l.globalOperationsInFlight.Add(-1) + return fmt.Errorf("global concurrent request limit exceeded for GetChunks operations, try again later") + } + + allowed := l.globalOpLimiter.AllowN(now, 1) + if !allowed { + l.globalOperationsInFlight.Add(-1) + return fmt.Errorf("global rate limit exceeded for GetChunks operations, try again later") + } + + clientInFlightCounter, ok := l.perClientOperationsInFlight[requesterID] + if !ok { + // This is the first time we've seen this client ID. + + l.perClientOperationsInFlight[requesterID] = &atomic.Int64{} + clientInFlightCounter = l.perClientOperationsInFlight[requesterID] + + l.perClientBandwidthLimiter[requesterID] = rate.NewLimiter( + rate.Limit(l.config.MaxGetChunkBytesPerSecond), intsets.MaxInt) + } + + countInFlight = clientInFlightCounter.Add(1) + if countInFlight > int64(l.config.MaxConcurrentGetChunkOpsClient) { + l.globalOperationsInFlight.Add(-1) + clientInFlightCounter.Add(-1) + return fmt.Errorf("client concurrent request limit exceeded for GetChunks") + } + + allowed = l.perClientOpLimiter[requesterID].AllowN(now, 1) + if !allowed { + l.globalOperationsInFlight.Add(-1) + clientInFlightCounter.Add(-1) + return fmt.Errorf("client rate limit exceeded for GetChunks, try again later") + } + + return nil +} + +// FinishGetChunkOperation should be called when a GetChunk operation completes. +func (l *ChunkRateLimiter) FinishGetChunkOperation(requesterID string) { + if l == nil { + return + } + + l.globalOperationsInFlight.Add(-1) + l.perClientOperationsInFlight[requesterID].Add(-1) +} + +// RequestGetChunkBandwidth should be called when a GetChunk is about to start downloading chunk data. +func (l *ChunkRateLimiter) RequestGetChunkBandwidth(now time.Time, requesterID string, bytes int) error { + if l == nil { + // If the rate limiter is nil, do not enforce rate limits. + return nil + } + + allowed := l.globalBandwidthLimiter.AllowN(now, bytes) + if !allowed { + return fmt.Errorf("global rate limit exceeded for GetChunk bandwidth, try again later") + } + + allowed = l.perClientBandwidthLimiter[requesterID].AllowN(now, bytes) + if !allowed { + l.globalBandwidthLimiter.AllowN(now, -bytes) + return fmt.Errorf("client rate limit exceeded for GetChunk bandwidth, try again later") + } + + return nil +} diff --git a/relay/limiter/config.go b/relay/limiter/config.go new file mode 100644 index 0000000000..716d70eeff --- /dev/null +++ b/relay/limiter/config.go @@ -0,0 +1,58 @@ +package limiter + +// Config is the configuration for the relay rate limiting. +type Config struct { + + // Blob rate limiting + + // MaxGetBlobOpsPerSecond is the maximum permitted number of GetBlob operations per second. Default is + // 1024. + MaxGetBlobOpsPerSecond float64 + // MaxGetBlobBytesPerSecond is the maximum bandwidth, in bytes, that GetBlob operations are permitted + // to consume per second. Default is 20MiB/s. + MaxGetBlobBytesPerSecond float64 + // MaxConcurrentGetBlobOps is the maximum number of concurrent GetBlob operations that are permitted. + // This is in addition to the rate limits. Default is 1024. + MaxConcurrentGetBlobOps int + + // Chunk rate limiting + + // MaxGetChunkOpsPerSecond is the maximum permitted number of GetChunk operations per second. Default is + // 1024. + MaxGetChunkOpsPerSecond float64 + // MaxGetChunkBytesPerSecond is the maximum bandwidth, in bytes, that GetChunk operations are permitted + // to consume per second. Default is 20MiB/s. + MaxGetChunkBytesPerSecond float64 + // MaxConcurrentGetChunkOps is the maximum number of concurrent GetChunk operations that are permitted. + // Default is 1024. + MaxConcurrentGetChunkOps int + + // Client rate limiting for GetChunk operations + + // MaxGetChunkOpsPerSecondClient is the maximum permitted number of GetChunk operations per second for a single + // client. Default is 8. + MaxGetChunkOpsPerSecondClient float64 + // MaxGetChunkBytesPerSecondClient is the maximum bandwidth, in bytes, that GetChunk operations are permitted + // to consume per second. Default is 2MiB/s. + MaxGetChunkBytesPerSecondClient float64 + // MaxConcurrentGetChunkOpsClient is the maximum number of concurrent GetChunk operations that are permitted. + // Default is 1. + MaxConcurrentGetChunkOpsClient int +} + +// DefaultConfig returns a default rate limit configuration. +func DefaultConfig() *Config { + return &Config{ + MaxGetBlobOpsPerSecond: 1024, + MaxGetBlobBytesPerSecond: 20 * 1024 * 1024, + MaxConcurrentGetBlobOps: 1024, + + MaxGetChunkOpsPerSecond: 1024, + MaxGetChunkBytesPerSecond: 20 * 1024 * 1024, + MaxConcurrentGetChunkOps: 1024, + + MaxGetChunkOpsPerSecondClient: 8, + MaxGetChunkBytesPerSecondClient: 2 * 1024 * 1024, + MaxConcurrentGetChunkOpsClient: 1, + } +} diff --git a/relay/server.go b/relay/server.go index dc006e43a3..57364f8019 100644 --- a/relay/server.go +++ b/relay/server.go @@ -9,7 +9,9 @@ import ( "github.com/Layr-Labs/eigenda/disperser/common/v2/blobstore" "github.com/Layr-Labs/eigenda/encoding" "github.com/Layr-Labs/eigenda/relay/chunkstore" + "github.com/Layr-Labs/eigenda/relay/limiter" "github.com/Layr-Labs/eigensdk-go/logging" + "time" ) var _ pb.RelayServer = &Server{} @@ -18,6 +20,9 @@ var _ pb.RelayServer = &Server{} type Server struct { pb.UnimplementedRelayServer + // config is the configuration for the relay Server. + config *Config + // metadataProvider encapsulates logic for fetching metadata for blobs. metadataProvider *metadataProvider @@ -26,6 +31,12 @@ type Server struct { // chunkProvider encapsulates logic for fetching chunks. chunkProvider *chunkProvider + + // blobRateLimiter enforces rate limits on GetBlob and operations. + blobRateLimiter *limiter.BlobRateLimiter + + // chunkRateLimiter enforces rate limits on GetChunk operations. + chunkRateLimiter *limiter.ChunkRateLimiter } // NewServer creates a new relay Server. @@ -69,20 +80,26 @@ func NewServer( } return &Server{ + config: config, metadataProvider: ms, blobProvider: bs, chunkProvider: cs, + blobRateLimiter: limiter.NewBlobRateLimiter(&config.RateLimits), + chunkRateLimiter: limiter.NewChunkRateLimiter(&config.RateLimits), }, nil } // GetBlob retrieves a blob stored by the relay. func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.GetBlobReply, error) { - // Future work : - // - global throttle - // - per-connection throttle + // Future work: // - timeouts + err := s.blobRateLimiter.BeginGetBlobOperation(time.Now()) + if err != nil { + return nil, err + } + key, err := v2.BytesToBlobKey(request.BlobKey) if err != nil { return nil, fmt.Errorf("invalid blob key: %w", err) @@ -99,6 +116,11 @@ func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.G return nil, fmt.Errorf("blob not found") } + err = s.blobRateLimiter.RequestGetBlobBandwidth(time.Now(), metadata.blobSizeBytes) // TODO make sure this field is populated + if err != nil { + return nil, err + } + data, err := s.blobProvider.GetBlob(key) if err != nil { return nil, fmt.Errorf("error fetching blob %s: %w", key.Hex(), err) @@ -116,13 +138,22 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* // Future work: // - authentication - // - global throttle - // - per-connection throttle // - timeouts if len(request.ChunkRequests) <= 0 { return nil, fmt.Errorf("no chunk requests provided") } + if len(request.ChunkRequests) > s.config.MaxKeysPerGetChunksRequest { + return nil, fmt.Errorf( + "too many chunk requests provided, max is %d", s.config.MaxKeysPerGetChunksRequest) + } + + clientID := fmt.Sprintf("%d", request.RequesterId) //TODO + err := s.chunkRateLimiter.BeginGetChunkOperation(time.Now(), clientID) + if err != nil { + return nil, err + } + defer s.chunkRateLimiter.FinishGetChunkOperation(clientID) keys := make([]v2.BlobKey, 0, len(request.ChunkRequests)) @@ -150,6 +181,12 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* "error fetching metadata for blob, check if blob exists and is assigned to this relay: %w", err) } + requiredBandwidth := 0 // TODO calculate this + err = s.chunkRateLimiter.RequestGetChunkBandwidth(time.Now(), clientID, requiredBandwidth) + if err != nil { + return nil, err + } + frames, err := s.chunkProvider.GetFrames(ctx, mMap) if err != nil { return nil, fmt.Errorf("error fetching frames: %w", err) From dc39c21d8d93a99de334a7d19c2d3ca188d81719 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Fri, 15 Nov 2024 12:14:04 -0600 Subject: [PATCH 02/45] Properly handle blob sizes. Signed-off-by: Cody Littley --- relay/metadata_provider.go | 11 ++++++++++- relay/server.go | 28 ++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/relay/metadata_provider.go b/relay/metadata_provider.go index 6a12964e1d..84917bf14e 100644 --- a/relay/metadata_provider.go +++ b/relay/metadata_provider.go @@ -15,6 +15,8 @@ import ( type blobMetadata struct { // the size of the blob in bytes blobSizeBytes uint32 + // the size of each encoded chunk + chunkSizeBytes uint32 // the size of the file containing the encoded chunks totalChunkSizeBytes uint32 // the fragment size used for uploading the encoded chunks @@ -159,8 +161,15 @@ func (m *metadataProvider) fetchMetadata(key v2.BlobKey) (*blobMetadata, error) } } + blobSize := uint32(cert.BlobHeader.BlobCommitments.Length) + chunkSize, err := v2.GetChunkLength(cert.BlobHeader.BlobVersion, blobSize) + if err != nil { + return nil, fmt.Errorf("error getting chunk length: %w", err) + } + metadata := &blobMetadata{ - blobSizeBytes: 0, /* Future work: populate this once it is added to the metadata store */ + blobSizeBytes: blobSize, + chunkSizeBytes: chunkSize, totalChunkSizeBytes: fragmentInfo.TotalChunkSizeBytes, fragmentSizeBytes: fragmentInfo.FragmentSizeBytes, } diff --git a/relay/server.go b/relay/server.go index 57364f8019..d688edd840 100644 --- a/relay/server.go +++ b/relay/server.go @@ -116,7 +116,7 @@ func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.G return nil, fmt.Errorf("blob not found") } - err = s.blobRateLimiter.RequestGetBlobBandwidth(time.Now(), metadata.blobSizeBytes) // TODO make sure this field is populated + err = s.blobRateLimiter.RequestGetBlobBandwidth(time.Now(), metadata.blobSizeBytes) if err != nil { return nil, err } @@ -181,7 +181,7 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* "error fetching metadata for blob, check if blob exists and is assigned to this relay: %w", err) } - requiredBandwidth := 0 // TODO calculate this + requiredBandwidth := computeChunkRequestRequiredBandwidth(request, mMap) err = s.chunkRateLimiter.RequestGetChunkBandwidth(time.Now(), clientID, requiredBandwidth) if err != nil { return nil, err @@ -247,3 +247,27 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* Data: bytesToSend, }, nil } + +// TODO unit test +// computeChunkRequestRequiredBandwidth computes the bandwidth required to fulfill a GetChunks request. +func computeChunkRequestRequiredBandwidth(request *pb.GetChunksRequest, mMap metadataMap) int { + requiredBandwidth := 0 + for _, req := range request.ChunkRequests { + var metadata *blobMetadata + var requestedChunks int + + if req.GetByIndex() != nil { + key := v2.BlobKey(req.GetByIndex().GetBlobKey()) + metadata = mMap[key] + requestedChunks = len(req.GetByIndex().ChunkIndices) + } else { + key := v2.BlobKey(req.GetByRange().GetBlobKey()) + metadata = mMap[key] + requestedChunks = int(req.GetByRange().EndIndex - req.GetByRange().StartIndex) + } + + requiredBandwidth += requestedChunks * int(metadata.chunkSizeBytes) + } + + return requiredBandwidth +} From d99fd3924fa83dee3f1554f607a26e17ff5d546b Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Fri, 15 Nov 2024 12:50:30 -0600 Subject: [PATCH 03/45] Incremental progress. Signed-off-by: Cody Littley --- relay/limiter/blob_rate_limiter.go | 12 ++-- relay/limiter/blob_rate_limiter_test.go | 1 + relay/limiter/chunk_rate_limiter.go | 18 ++++-- relay/limiter/config.go | 30 +++++++++ relay/limiter/limiter_test.go | 86 +++++++++++++++++++++++++ 5 files changed, 138 insertions(+), 9 deletions(-) create mode 100644 relay/limiter/blob_rate_limiter_test.go create mode 100644 relay/limiter/limiter_test.go diff --git a/relay/limiter/blob_rate_limiter.go b/relay/limiter/blob_rate_limiter.go index 8c56e5e036..f726a1df1a 100644 --- a/relay/limiter/blob_rate_limiter.go +++ b/relay/limiter/blob_rate_limiter.go @@ -3,7 +3,6 @@ package limiter import ( "fmt" "golang.org/x/time/rate" - "golang.org/x/tools/container/intsets" "sync/atomic" "time" ) @@ -27,12 +26,15 @@ type BlobRateLimiter struct { operationsInFlight atomic.Int64 } +// NewBlobRateLimiter creates a new BlobRateLimiter. func NewBlobRateLimiter(config *Config) *BlobRateLimiter { - globalGetBlobOpLimiter := rate.NewLimiter(rate.Limit(config.MaxGetBlobOpsPerSecond), 1) + globalGetBlobOpLimiter := rate.NewLimiter( + rate.Limit(config.MaxGetBlobOpsPerSecond), + config.GetBlobOpsBurstiness) - // Burst size is set to MaxInt. This is safe, as the requested size is always a size we've - // determined by reading the blob metadata, which is guaranteed to respect maximum blob size. - globalGetBlobBandwidthLimiter := rate.NewLimiter(rate.Limit(config.MaxGetBlobBytesPerSecond), intsets.MaxInt) + globalGetBlobBandwidthLimiter := rate.NewLimiter( + rate.Limit(config.MaxGetBlobBytesPerSecond), + config.GetBlobBytesBurstiness) return &BlobRateLimiter{ config: config, diff --git a/relay/limiter/blob_rate_limiter_test.go b/relay/limiter/blob_rate_limiter_test.go new file mode 100644 index 0000000000..6a935d2caf --- /dev/null +++ b/relay/limiter/blob_rate_limiter_test.go @@ -0,0 +1 @@ +package limiter diff --git a/relay/limiter/chunk_rate_limiter.go b/relay/limiter/chunk_rate_limiter.go index d71722b1f9..d6b520bfa1 100644 --- a/relay/limiter/chunk_rate_limiter.go +++ b/relay/limiter/chunk_rate_limiter.go @@ -3,7 +3,6 @@ package limiter import ( "fmt" "golang.org/x/time/rate" - "golang.org/x/tools/container/intsets" "sync/atomic" "time" ) @@ -44,10 +43,16 @@ type ChunkRateLimiter struct { perClientOperationsInFlight map[string]*atomic.Int64 } +// NewChunkRateLimiter creates a new ChunkRateLimiter. func NewChunkRateLimiter(config *Config) *ChunkRateLimiter { - globalOpLimiter := rate.NewLimiter(rate.Limit(config.MaxGetChunkOpsPerSecond), 1) - globalBandwidthLimiter := rate.NewLimiter(rate.Limit(config.MaxGetChunkBytesPerSecond), intsets.MaxInt) + globalOpLimiter := rate.NewLimiter(rate.Limit( + config.MaxGetChunkOpsPerSecond), + config.GetChunkOpsBurstiness) + + globalBandwidthLimiter := rate.NewLimiter(rate.Limit( + config.MaxGetChunkBytesPerSecond), + config.GetChunkBytesBurstiness) return &ChunkRateLimiter{ config: config, @@ -90,8 +95,13 @@ func (l *ChunkRateLimiter) BeginGetChunkOperation( l.perClientOperationsInFlight[requesterID] = &atomic.Int64{} clientInFlightCounter = l.perClientOperationsInFlight[requesterID] + l.perClientOpLimiter[requesterID] = rate.NewLimiter( + rate.Limit(l.config.MaxGetChunkOpsPerSecondClient), + l.config.GetChunkOpsBurstinessClient) + l.perClientBandwidthLimiter[requesterID] = rate.NewLimiter( - rate.Limit(l.config.MaxGetChunkBytesPerSecond), intsets.MaxInt) + rate.Limit(l.config.MaxGetChunkBytesPerSecond), + l.config.GetChunkBytesBurstinessClient) } countInFlight = clientInFlightCounter.Add(1) diff --git a/relay/limiter/config.go b/relay/limiter/config.go index 716d70eeff..5b33d6ccd9 100644 --- a/relay/limiter/config.go +++ b/relay/limiter/config.go @@ -8,9 +8,17 @@ type Config struct { // MaxGetBlobOpsPerSecond is the maximum permitted number of GetBlob operations per second. Default is // 1024. MaxGetBlobOpsPerSecond float64 + // The burstiness of the MaxGetBlobOpsPerSecond rate limiter. This is the maximum burst size that happen within + // a short time window. Default is 1024. + GetBlobOpsBurstiness int + // MaxGetBlobBytesPerSecond is the maximum bandwidth, in bytes, that GetBlob operations are permitted // to consume per second. Default is 20MiB/s. MaxGetBlobBytesPerSecond float64 + // The burstiness of the MaxGetBlobBytesPerSecond rate limiter. This is the maximum burst size that happen within + // a short time window. Default is 20MiB. + GetBlobBytesBurstiness int + // MaxConcurrentGetBlobOps is the maximum number of concurrent GetBlob operations that are permitted. // This is in addition to the rate limits. Default is 1024. MaxConcurrentGetBlobOps int @@ -20,9 +28,17 @@ type Config struct { // MaxGetChunkOpsPerSecond is the maximum permitted number of GetChunk operations per second. Default is // 1024. MaxGetChunkOpsPerSecond float64 + // The burstiness of the MaxGetChunkOpsPerSecond rate limiter. This is the maximum burst size that happen within + // a short time window. Default is 1024. + GetChunkOpsBurstiness int + // MaxGetChunkBytesPerSecond is the maximum bandwidth, in bytes, that GetChunk operations are permitted // to consume per second. Default is 20MiB/s. MaxGetChunkBytesPerSecond float64 + // The burstiness of the MaxGetChunkBytesPerSecond rate limiter. This is the maximum burst size that happen within + // a short time window. Default is 20MiB. + GetChunkBytesBurstiness int + // MaxConcurrentGetChunkOps is the maximum number of concurrent GetChunk operations that are permitted. // Default is 1024. MaxConcurrentGetChunkOps int @@ -32,9 +48,17 @@ type Config struct { // MaxGetChunkOpsPerSecondClient is the maximum permitted number of GetChunk operations per second for a single // client. Default is 8. MaxGetChunkOpsPerSecondClient float64 + // The burstiness of the MaxGetChunkOpsPerSecondClient rate limiter. This is the maximum burst size that happen + // within a short time window. Default is 8. + GetChunkOpsBurstinessClient int + // MaxGetChunkBytesPerSecondClient is the maximum bandwidth, in bytes, that GetChunk operations are permitted // to consume per second. Default is 2MiB/s. MaxGetChunkBytesPerSecondClient float64 + // The burstiness of the MaxGetChunkBytesPerSecondClient rate limiter. This is the maximum burst size that happen + // within a short time window. Default is 2MiB. + GetChunkBytesBurstinessClient int + // MaxConcurrentGetChunkOpsClient is the maximum number of concurrent GetChunk operations that are permitted. // Default is 1. MaxConcurrentGetChunkOpsClient int @@ -44,15 +68,21 @@ type Config struct { func DefaultConfig() *Config { return &Config{ MaxGetBlobOpsPerSecond: 1024, + GetBlobOpsBurstiness: 1024, MaxGetBlobBytesPerSecond: 20 * 1024 * 1024, + GetBlobBytesBurstiness: 20 * 1024 * 1024, MaxConcurrentGetBlobOps: 1024, MaxGetChunkOpsPerSecond: 1024, + GetChunkOpsBurstiness: 1024, MaxGetChunkBytesPerSecond: 20 * 1024 * 1024, + GetChunkBytesBurstiness: 20 * 1024 * 1024, MaxConcurrentGetChunkOps: 1024, MaxGetChunkOpsPerSecondClient: 8, + GetChunkOpsBurstinessClient: 8, MaxGetChunkBytesPerSecondClient: 2 * 1024 * 1024, + GetChunkBytesBurstinessClient: 2 * 1024 * 1024, MaxConcurrentGetChunkOpsClient: 1, } } diff --git a/relay/limiter/limiter_test.go b/relay/limiter/limiter_test.go new file mode 100644 index 0000000000..6064220f40 --- /dev/null +++ b/relay/limiter/limiter_test.go @@ -0,0 +1,86 @@ +package limiter + +import ( + "github.com/stretchr/testify/require" + "golang.org/x/time/rate" + "testing" + "time" +) + +// The rate.Limiter library has less documentation than ideal. Although I can figure out what it's doing by reading +// the code, I think it's risky writing things that depend on what may change in the future. In these tests, I verify +// some basic properties of the rate.Limiter library, so that if these properties ever change in the future, the tests +// will fail and we'll know to update the code. + +func TestPositiveTokens(t *testing.T) { + configuredRate := rate.Limit(10.0) + // "burst" is equivalent to the bucket size, aka the number of tokens that can be stored + configuredBurst := 10 + + // time starts at current time, but advances manually afterward + now := time.Now() + + rateLimiter := rate.NewLimiter(configuredRate, configuredBurst) + + // number of tokens should equal the burst limit + require.Equal(t, configuredBurst, int(rateLimiter.TokensAt(now))) + + // moving forward in time should not change the number of tokens + now = now.Add(time.Second) + require.Equal(t, configuredBurst, int(rateLimiter.TokensAt(now))) + + // remove each token without advancing time + for i := 0; i < configuredBurst; i++ { + require.True(t, rateLimiter.AllowN(now, 1)) + require.Equal(t, configuredBurst-i-1, int(rateLimiter.TokensAt(now))) + } + require.Equal(t, 0, int(rateLimiter.TokensAt(now))) + + // removing an additional token should fail + require.False(t, rateLimiter.AllowN(now, 1)) + require.Equal(t, 0, int(rateLimiter.TokensAt(now))) + + // tokens should return at a rate of once per 100ms + for i := 0; i < configuredBurst; i++ { + now = now.Add(100 * time.Millisecond) + require.Equal(t, i+1, int(rateLimiter.TokensAt(now))) + } + require.Equal(t, configuredBurst, int(rateLimiter.TokensAt(now))) + + // remove 7 tokens all at once + require.True(t, rateLimiter.AllowN(now, 7)) + require.Equal(t, 3, int(rateLimiter.TokensAt(now))) + + // move forward 500ms, returning 5 tokens + now = now.Add(500 * time.Millisecond) + require.Equal(t, 8, int(rateLimiter.TokensAt(now))) + + // try to take more than the burst limit + require.False(t, rateLimiter.AllowN(now, 100)) +} + +func TestNegativeTokens(t *testing.T) { + configuredRate := rate.Limit(10.0) + // "burst" is equivalent to the bucket size, aka the number of tokens that can be stored + configuredBurst := 10 + + // time starts at current time, but advances manually afterward + now := time.Now() + + rateLimiter := rate.NewLimiter(configuredRate, configuredBurst) + + // number of tokens should equal the burst limit + require.Equal(t, configuredBurst, int(rateLimiter.TokensAt(now))) + + // remove all tokens then add them back + require.True(t, rateLimiter.AllowN(now, configuredBurst)) + require.Equal(t, 0, int(rateLimiter.TokensAt(now))) + for i := 0; i < configuredBurst; i++ { + require.True(t, rateLimiter.AllowN(now, -1)) + require.Equal(t, i+1, int(rateLimiter.TokensAt(now))) + } + + // nothing funky should happen when time advances + now = now.Add(100 * time.Second) + require.Equal(t, configuredBurst, int(rateLimiter.TokensAt(now))) +} From e39eaf9f20339237fee6b642c73816b22825f3d5 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Fri, 15 Nov 2024 13:34:20 -0600 Subject: [PATCH 04/45] Incremental progress. Signed-off-by: Cody Littley --- relay/limiter/blob_rate_limiter_test.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/relay/limiter/blob_rate_limiter_test.go b/relay/limiter/blob_rate_limiter_test.go index 6a935d2caf..ba437eda18 100644 --- a/relay/limiter/blob_rate_limiter_test.go +++ b/relay/limiter/blob_rate_limiter_test.go @@ -1 +1,24 @@ package limiter + +import ( + "testing" +) + +func TestConcurrentBlobOperations(t *testing.T) { + + //concurrencyLimit := 10 + // + //config := DefaultConfig() + //config.MaxConcurrentGetBlobOps = concurrencyLimit + // + //limiter := NewBlobRateLimiter(config) + // + //// time starts at current time, but advances manually afterward + //now := time.Now() + // + //// We should be able to start 10 operations concurrently + //for i := 0; i < concurrencyLimit; i++ { + // + //} + +} From 9500f4f8a8a72b03b995257aedb304ac22d9d6f3 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 18 Nov 2024 08:49:11 -0600 Subject: [PATCH 05/45] unit tests Signed-off-by: Cody Littley --- relay/limiter/blob_rate_limiter.go | 2 - relay/limiter/blob_rate_limiter_test.go | 147 ++++++++++++++++++++--- relay/limiter/chunk_rate_limiter_test.go | 1 + 3 files changed, 134 insertions(+), 16 deletions(-) create mode 100644 relay/limiter/chunk_rate_limiter_test.go diff --git a/relay/limiter/blob_rate_limiter.go b/relay/limiter/blob_rate_limiter.go index f726a1df1a..11dc12c7c9 100644 --- a/relay/limiter/blob_rate_limiter.go +++ b/relay/limiter/blob_rate_limiter.go @@ -7,8 +7,6 @@ import ( "time" ) -// TODO test - // BlobRateLimiter enforces rate limits on GetBlob operations. type BlobRateLimiter struct { diff --git a/relay/limiter/blob_rate_limiter_test.go b/relay/limiter/blob_rate_limiter_test.go index ba437eda18..3c6267ea12 100644 --- a/relay/limiter/blob_rate_limiter_test.go +++ b/relay/limiter/blob_rate_limiter_test.go @@ -1,24 +1,143 @@ package limiter import ( + tu "github.com/Layr-Labs/eigenda/common/testutils" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" "testing" + "time" ) func TestConcurrentBlobOperations(t *testing.T) { + tu.InitializeRandom() - //concurrencyLimit := 10 - // - //config := DefaultConfig() - //config.MaxConcurrentGetBlobOps = concurrencyLimit - // - //limiter := NewBlobRateLimiter(config) - // - //// time starts at current time, but advances manually afterward - //now := time.Now() - // - //// We should be able to start 10 operations concurrently - //for i := 0; i < concurrencyLimit; i++ { - // - //} + concurrencyLimit := 1 + rand.Intn(10) + config := DefaultConfig() + config.MaxConcurrentGetBlobOps = concurrencyLimit + // Make the burstiness limit high enough that we won't be rate limited + config.GetBlobOpsBurstiness = concurrencyLimit * 100 + + limiter := NewBlobRateLimiter(config) + + // time starts at current time, but advances manually afterward + now := time.Now() + + // We should be able to start this many operations concurrently + for i := 0; i < concurrencyLimit; i++ { + err := limiter.BeginGetBlobOperation(now) + require.NoError(t, err) + } + + // Starting one more operation should fail due to the concurrency limit + err := limiter.BeginGetBlobOperation(now) + require.Error(t, err) + + // Finish an operation. This should permit exactly one more operation to start + limiter.FinishGetBlobOperation() + err = limiter.BeginGetBlobOperation(now) + require.NoError(t, err) + err = limiter.BeginGetBlobOperation(now) + require.Error(t, err) +} + +func TestGetBlobOpRateLimit(t *testing.T) { + tu.InitializeRandom() + + config := DefaultConfig() + config.MaxGetBlobOpsPerSecond = float64(2 + rand.Intn(10)) + config.GetBlobOpsBurstiness = int(config.MaxGetBlobOpsPerSecond) + rand.Intn(10) + config.MaxConcurrentGetBlobOps = 1 + + limiter := NewBlobRateLimiter(config) + + // time starts at current time, but advances manually afterward + now := time.Now() + + // Without advancing time, we should be able to perform a number of operations equal to the burstiness limit. + for i := 0; i < config.GetBlobOpsBurstiness; i++ { + err := limiter.BeginGetBlobOperation(now) + require.NoError(t, err) + limiter.FinishGetBlobOperation() + } + + // We are not at the rate limit, and should be able to start another operation. + err := limiter.BeginGetBlobOperation(now) + require.Error(t, err) + + // Advance time by one second. We should gain a number of tokens equal to the rate limit. + now = now.Add(time.Second) + for i := 0; i < int(config.MaxGetBlobOpsPerSecond); i++ { + err = limiter.BeginGetBlobOperation(now) + require.NoError(t, err) + limiter.FinishGetBlobOperation() + } + + // We have once again hit the rate limit. We should not be able to start another operation. + err = limiter.BeginGetBlobOperation(now) + require.Error(t, err) + + // Advance time by another second. We should gain another number of tokens equal to the rate limit. + // Intentionally do not finish the next operation. We are attempting to get a failure by exceeding + // the max concurrent operations limit. + now = now.Add(time.Second) + err = limiter.BeginGetBlobOperation(now) + require.NoError(t, err) + + // This operation should fail since we have limited concurrent operations to 1. It should not count + // against the rate limit. + err = limiter.BeginGetBlobOperation(now) + require.Error(t, err) + + // "finish" the prior operation. Verify that we have all expected tokens available. + limiter.FinishGetBlobOperation() + for i := 0; i < int(config.MaxGetBlobOpsPerSecond)-1; i++ { + err = limiter.BeginGetBlobOperation(now) + require.NoError(t, err) + limiter.FinishGetBlobOperation() + } + + // We should now be at the rate limit. We should not be able to start another operation. + err = limiter.BeginGetBlobOperation(now) + require.Error(t, err) +} + +func TestGetBlobBandwidthLimit(t *testing.T) { + tu.InitializeRandom() + + config := DefaultConfig() + config.MaxGetBlobBytesPerSecond = float64(1024 + rand.Intn(1024*1024)) + config.GetBlobBytesBurstiness = int(config.MaxGetBlobBytesPerSecond) + rand.Intn(1024*1024) + + limiter := NewBlobRateLimiter(config) + + // time starts at current time, but advances manually afterward + now := time.Now() + + // Without advancing time, we should be able to utilize a number of bytes equal to the burstiness limit. + bytesRemaining := config.GetBlobBytesBurstiness + for bytesRemaining > 0 { + bytesToRequest := 1 + rand.Intn(bytesRemaining) + err := limiter.RequestGetBlobBandwidth(now, uint32(bytesToRequest)) + require.NoError(t, err) + bytesRemaining -= bytesToRequest + } + + // Requesting one more byte should fail due to the bandwidth limit + err := limiter.RequestGetBlobBandwidth(now, 1) + require.Error(t, err) + + // Advance time by one second. We should gain a number of tokens equal to the rate limit. + now = now.Add(time.Second) + bytesRemaining = int(config.MaxGetBlobBytesPerSecond) + for bytesRemaining > 0 { + bytesToRequest := 1 + rand.Intn(bytesRemaining) + err = limiter.RequestGetBlobBandwidth(now, uint32(bytesToRequest)) + require.NoError(t, err) + bytesRemaining -= bytesToRequest + } + + // Requesting one more byte should fail due to the bandwidth limit + err = limiter.RequestGetBlobBandwidth(now, 1) + require.Error(t, err) } diff --git a/relay/limiter/chunk_rate_limiter_test.go b/relay/limiter/chunk_rate_limiter_test.go new file mode 100644 index 0000000000..6a935d2caf --- /dev/null +++ b/relay/limiter/chunk_rate_limiter_test.go @@ -0,0 +1 @@ +package limiter From ffe18c625e81aa788996851f26d8abf0628bcbf9 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 18 Nov 2024 10:11:57 -0600 Subject: [PATCH 06/45] Unit tests. Signed-off-by: Cody Littley --- relay/limiter/chunk_rate_limiter.go | 2 +- relay/limiter/chunk_rate_limiter_test.go | 333 +++++++++++++++++++++++ 2 files changed, 334 insertions(+), 1 deletion(-) diff --git a/relay/limiter/chunk_rate_limiter.go b/relay/limiter/chunk_rate_limiter.go index d6b520bfa1..b2249ab5f8 100644 --- a/relay/limiter/chunk_rate_limiter.go +++ b/relay/limiter/chunk_rate_limiter.go @@ -100,7 +100,7 @@ func (l *ChunkRateLimiter) BeginGetChunkOperation( l.config.GetChunkOpsBurstinessClient) l.perClientBandwidthLimiter[requesterID] = rate.NewLimiter( - rate.Limit(l.config.MaxGetChunkBytesPerSecond), + rate.Limit(l.config.MaxGetChunkBytesPerSecondClient), l.config.GetChunkBytesBurstinessClient) } diff --git a/relay/limiter/chunk_rate_limiter_test.go b/relay/limiter/chunk_rate_limiter_test.go index 6a935d2caf..c30349211e 100644 --- a/relay/limiter/chunk_rate_limiter_test.go +++ b/relay/limiter/chunk_rate_limiter_test.go @@ -1 +1,334 @@ package limiter + +import ( + tu "github.com/Layr-Labs/eigenda/common/testutils" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" + "math" + "testing" + "time" +) + +func TestConcurrentGetChunksOperations(t *testing.T) { + tu.InitializeRandom() + + concurrencyLimit := 1 + rand.Intn(10) + + config := DefaultConfig() + config.MaxConcurrentGetChunkOps = concurrencyLimit + config.GetChunkOpsBurstiness = math.MaxInt32 + config.GetChunkOpsBurstinessClient = math.MaxInt32 + + userID := tu.RandomString(64) + + limiter := NewChunkRateLimiter(config) + + // time starts at current time, but advances manually afterward + now := time.Now() + + // We should be able to start this many operations concurrently + for i := 0; i < concurrencyLimit; i++ { + err := limiter.BeginGetChunkOperation(now, userID) + require.NoError(t, err) + } + + // Starting one more operation should fail due to the concurrency limit + err := limiter.BeginGetChunkOperation(now, userID) + require.Error(t, err) + + // Finish an operation. This should permit exactly one more operation to start + limiter.FinishGetChunkOperation(userID) + err = limiter.BeginGetChunkOperation(now, userID) + require.NoError(t, err) + err = limiter.BeginGetChunkOperation(now, userID) + require.Error(t, err) +} + +func TestGetChunksRateLimit(t *testing.T) { + tu.InitializeRandom() + + config := DefaultConfig() + config.MaxGetChunkOpsPerSecond = float64(2 + rand.Intn(10)) + config.GetChunkOpsBurstiness = int(config.MaxGetChunkOpsPerSecond) + rand.Intn(10) + config.GetChunkOpsBurstinessClient = math.MaxInt32 + config.MaxConcurrentGetChunkOps = 1 + + userID := tu.RandomString(64) + + limiter := NewChunkRateLimiter(config) + + // time starts at current time, but advances manually afterward + now := time.Now() + + // Without advancing time, we should be able to perform a number of operations equal to the burstiness limit. + for i := 0; i < config.GetChunkOpsBurstiness; i++ { + err := limiter.BeginGetChunkOperation(now, userID) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID) + } + + // We are now at the rate limit, and should not be able to start another operation. + err := limiter.BeginGetChunkOperation(now, userID) + require.Error(t, err) + + // Advance time by one second. We should now be able to perform a number of operations equal to the rate limit. + now = now.Add(time.Second) + for i := 0; i < int(config.MaxGetChunkOpsPerSecond); i++ { + err = limiter.BeginGetChunkOperation(now, userID) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID) + } + + // We are now at the rate limit, and should not be able to start another operation. + err = limiter.BeginGetChunkOperation(now, userID) + require.Error(t, err) + + // Advance time by one second. + // Intentionally do not finish the operation. We are attempting to see what happens when an operation fails + // due to the limit on parallel operations. + now = now.Add(time.Second) + err = limiter.BeginGetChunkOperation(now, userID) + require.NoError(t, err) + + // This operation will fail due to the concurrency limit. It should not affect the rate limit. + err = limiter.BeginGetChunkOperation(now, userID) + require.Error(t, err) + + // Finish the operation that was started in the previous second. This should permit the next operation to start. + limiter.FinishGetChunkOperation(userID) + + // Verify that we have the expected number of available tokens. + for i := 0; i < int(config.MaxGetChunkOpsPerSecond)-1; i++ { + err = limiter.BeginGetChunkOperation(now, userID) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID) + } + + // We are now at the rate limit, and should not be able to start another operation. + err = limiter.BeginGetChunkOperation(now, userID) + require.Error(t, err) +} + +func TestGetChunksBandwidthLimit(t *testing.T) { + tu.InitializeRandom() + + config := DefaultConfig() + config.MaxGetChunkBytesPerSecond = float64(1024 + rand.Intn(1024*1024)) + config.GetChunkBytesBurstiness = int(config.MaxGetBlobBytesPerSecond) + rand.Intn(1024*1024) + config.GetChunkBytesBurstinessClient = math.MaxInt32 + + userID := tu.RandomString(64) + + limiter := NewChunkRateLimiter(config) + + // time starts at current time, but advances manually afterward + now := time.Now() + + // "register" the user ID + err := limiter.BeginGetChunkOperation(now, userID) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID) + + // Without advancing time, we should be able to utilize a number of bytes equal to the burstiness limit. + bytesRemaining := config.GetChunkBytesBurstiness + for bytesRemaining > 0 { + bytesToRequest := 1 + rand.Intn(bytesRemaining) + err = limiter.RequestGetChunkBandwidth(now, userID, bytesToRequest) + require.NoError(t, err) + bytesRemaining -= bytesToRequest + } + + // Requesting one more byte should fail due to the bandwidth limit + err = limiter.RequestGetChunkBandwidth(now, userID, 1) + require.Error(t, err) + + // Advance time by one second. We should gain a number of tokens equal to the rate limit. + now = now.Add(time.Second) + bytesRemaining = int(config.MaxGetChunkBytesPerSecond) + for bytesRemaining > 0 { + bytesToRequest := 1 + rand.Intn(bytesRemaining) + err = limiter.RequestGetChunkBandwidth(now, userID, bytesToRequest) + require.NoError(t, err) + bytesRemaining -= bytesToRequest + } + + // Requesting one more byte should fail due to the bandwidth limit + err = limiter.RequestGetChunkBandwidth(now, userID, 1) + require.Error(t, err) +} + +func TestPerClientConcurrencyLimit(t *testing.T) { + tu.InitializeRandom() + + config := DefaultConfig() + config.MaxConcurrentGetChunkOpsClient = 1 + rand.Intn(10) + config.MaxConcurrentGetChunkOps = 2 * config.MaxConcurrentGetChunkOpsClient + config.GetChunkOpsBurstinessClient = math.MaxInt32 + config.GetChunkOpsBurstiness = math.MaxInt32 + + userID1 := tu.RandomString(64) + userID2 := tu.RandomString(64) + + limiter := NewChunkRateLimiter(config) + + // time starts at current time, but advances manually afterward + now := time.Now() + + // Start the maximum permitted number of operations for user 1 + for i := 0; i < config.MaxConcurrentGetChunkOpsClient; i++ { + err := limiter.BeginGetChunkOperation(now, userID1) + require.NoError(t, err) + } + + // Starting another operation for user 1 should fail due to the concurrency limit + err := limiter.BeginGetChunkOperation(now, userID1) + require.Error(t, err) + + // The failure to start the operation for client 1 should not use up any of the global concurrency slots. + // To verify this, allow the maximum number of operations for client 2 to start. + for i := 0; i < config.MaxConcurrentGetChunkOpsClient; i++ { + err := limiter.BeginGetChunkOperation(now, userID2) + require.NoError(t, err) + } + + // Starting another operation for client 2 should fail due to the concurrency limit + err = limiter.BeginGetChunkOperation(now, userID2) + require.Error(t, err) + + // Ending an operation from client 2 should not affect the concurrency limit for client 1. + limiter.FinishGetChunkOperation(userID2) + err = limiter.BeginGetChunkOperation(now, userID1) + require.Error(t, err) + + // Ending an operation from client 1 should permit another operation for client 1 to start. + limiter.FinishGetChunkOperation(userID1) + err = limiter.BeginGetChunkOperation(now, userID1) + require.NoError(t, err) +} + +func TestOpLimitPerClient(t *testing.T) { + tu.InitializeRandom() + + config := DefaultConfig() + config.MaxGetChunkOpsPerSecondClient = float64(2 + rand.Intn(10)) + config.GetChunkOpsBurstinessClient = int(config.MaxGetChunkOpsPerSecondClient) + rand.Intn(10) + config.GetChunkOpsBurstiness = math.MaxInt32 + + userID1 := tu.RandomString(64) + userID2 := tu.RandomString(64) + + limiter := NewChunkRateLimiter(config) + + // time starts at current time, but advances manually afterward + now := time.Now() + + // Without advancing time, we should be able to perform a number of operations equal to the burstiness limit. + for i := 0; i < config.GetChunkOpsBurstinessClient; i++ { + err := limiter.BeginGetChunkOperation(now, userID1) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID1) + } + + // We are not at the rate limit, and should be able to start another operation. + err := limiter.BeginGetChunkOperation(now, userID1) + require.Error(t, err) + + // Client 2 should not be rate limited based on actions by client 1. + for i := 0; i < config.GetChunkOpsBurstinessClient; i++ { + err := limiter.BeginGetChunkOperation(now, userID2) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID2) + } + + // Client 2 should now have exhausted its burstiness limit. + err = limiter.BeginGetChunkOperation(now, userID2) + require.Error(t, err) + + // Advancing time by a second should permit more operations. + now = now.Add(time.Second) + for i := 0; i < int(config.MaxGetChunkOpsPerSecondClient); i++ { + err = limiter.BeginGetChunkOperation(now, userID1) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID1) + err = limiter.BeginGetChunkOperation(now, userID2) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID2) + } + + // No more operations should be permitted for either client. + err = limiter.BeginGetChunkOperation(now, userID1) + require.Error(t, err) + err = limiter.BeginGetChunkOperation(now, userID2) + require.Error(t, err) +} + +func TestBandwidthLimitPerClient(t *testing.T) { + tu.InitializeRandom() + + config := DefaultConfig() + config.MaxGetChunkBytesPerSecondClient = float64(1024 + rand.Intn(1024*1024)) + config.GetChunkBytesBurstinessClient = int(config.MaxGetBlobBytesPerSecond) + rand.Intn(1024*1024) + config.GetChunkBytesBurstiness = math.MaxInt32 + config.GetChunkOpsBurstiness = math.MaxInt32 + config.GetChunkOpsBurstinessClient = math.MaxInt32 + + userID1 := tu.RandomString(64) + userID2 := tu.RandomString(64) + + limiter := NewChunkRateLimiter(config) + + // time starts at current time, but advances manually afterward + now := time.Now() + + // "register" the user IDs + err := limiter.BeginGetChunkOperation(now, userID1) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID1) + err = limiter.BeginGetChunkOperation(now, userID2) + require.NoError(t, err) + limiter.FinishGetChunkOperation(userID2) + + // Request maximum possible bandwidth for client 1 + bytesRemaining := config.GetChunkBytesBurstinessClient + for bytesRemaining > 0 { + bytesToRequest := 1 + rand.Intn(bytesRemaining) + err = limiter.RequestGetChunkBandwidth(now, userID1, bytesToRequest) + require.NoError(t, err) + bytesRemaining -= bytesToRequest + } + + // Requesting one more byte should fail due to the bandwidth limit + err = limiter.RequestGetChunkBandwidth(now, userID1, 1) + require.Error(t, err) + + // User 2 should have its full bandwidth allowance available + bytesRemaining = config.GetChunkBytesBurstinessClient + for bytesRemaining > 0 { + bytesToRequest := 1 + rand.Intn(bytesRemaining) + err = limiter.RequestGetChunkBandwidth(now, userID2, bytesToRequest) + require.NoError(t, err) + bytesRemaining -= bytesToRequest + } + + // Requesting one more byte should fail due to the bandwidth limit + err = limiter.RequestGetChunkBandwidth(now, userID2, 1) + require.Error(t, err) + + // Advance time by one second. We should gain a number of tokens equal to the rate limit. + now = now.Add(time.Second) + bytesRemaining = int(config.MaxGetChunkBytesPerSecondClient) + for bytesRemaining > 0 { + bytesToRequest := 1 + rand.Intn(bytesRemaining) + err = limiter.RequestGetChunkBandwidth(now, userID1, bytesToRequest) + require.NoError(t, err) + err = limiter.RequestGetChunkBandwidth(now, userID2, bytesToRequest) + require.NoError(t, err) + bytesRemaining -= bytesToRequest + } + + // All bandwidth should now be exhausted for both clients + err = limiter.RequestGetChunkBandwidth(now, userID1, 1) + require.Error(t, err) + err = limiter.RequestGetChunkBandwidth(now, userID2, 1) + require.Error(t, err) +} From 79d96147eca61fb220b1711cc42c31943b548036 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 18 Nov 2024 11:13:18 -0600 Subject: [PATCH 07/45] Fix tests. Signed-off-by: Cody Littley --- relay/config.go | 27 ++++++++-------- relay/relay_test_utils.go | 2 +- relay/server.go | 68 ++++++++++++++++++++++++++------------- relay/server_test.go | 12 +++++++ 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/relay/config.go b/relay/config.go index d0c17fed95..10f6632513 100644 --- a/relay/config.go +++ b/relay/config.go @@ -89,19 +89,20 @@ type Config struct { // DefaultConfig returns the default configuration for the relay Server. func DefaultConfig() *Config { return &Config{ - Log: common.DefaultLoggerConfig(), - AWS: *aws.DefaultClientConfig(), - GRPCPort: 50051, - MaxGRPCMessageSize: 1024 * 1024 * 300, - BucketName: "relay", - MetadataTableName: "metadata", - MetadataCacheSize: 1024 * 1024, - MetadataMaxConcurrency: 32, - BlobCacheSize: 32, - BlobMaxConcurrency: 32, - ChunkCacheSize: 32, - ChunkMaxConcurrency: 32, - RateLimits: *limiter.DefaultConfig(), + Log: common.DefaultLoggerConfig(), + AWS: *aws.DefaultClientConfig(), + GRPCPort: 50051, + MaxGRPCMessageSize: 1024 * 1024 * 300, + BucketName: "relay", + MetadataTableName: "metadata", + MetadataCacheSize: 1024 * 1024, + MetadataMaxConcurrency: 32, + BlobCacheSize: 32, + BlobMaxConcurrency: 32, + ChunkCacheSize: 32, + ChunkMaxConcurrency: 32, + MaxKeysPerGetChunksRequest: 1024, + RateLimits: *limiter.DefaultConfig(), } } diff --git a/relay/relay_test_utils.go b/relay/relay_test_utils.go index 3c6d297b9e..01a4ec1056 100644 --- a/relay/relay_test_utils.go +++ b/relay/relay_test_utils.go @@ -177,7 +177,7 @@ func buildChunkStore(t *testing.T, logger logging.Logger) (chunkstore.ChunkReade func randomBlob(t *testing.T) (*v2.BlobHeader, []byte) { - data := tu.RandomBytes(128) + data := tu.RandomBytes(225) // TODO talk to Ian about this data = codec.ConvertByPaddingEmptyByte(data) commitments, err := prover.GetCommitments(data) diff --git a/relay/server.go b/relay/server.go index 0da44b668b..c3bc217254 100644 --- a/relay/server.go +++ b/relay/server.go @@ -27,6 +27,7 @@ type Server struct { // config is the configuration for the relay Server. config *Config + // the logger for the server logger logging.Logger @@ -96,6 +97,7 @@ func NewServer( } return &Server{ + config: config, logger: logger, grpcPort: config.GRPCPort, maxProtoSize: config.MaxGRPCMessageSize, @@ -166,13 +168,48 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* "too many chunk requests provided, max is %d", s.config.MaxKeysPerGetChunksRequest) } - clientID := fmt.Sprintf("%d", request.RequesterId) //TODO + // Future work: client IDs will be fixed when authentication is implemented + clientID := fmt.Sprintf("%d", request.RequesterId) err := s.chunkRateLimiter.BeginGetChunkOperation(time.Now(), clientID) if err != nil { return nil, err } defer s.chunkRateLimiter.FinishGetChunkOperation(clientID) + keys, err := getKeysFromChunkRequest(request) + if err != nil { + return nil, err + } + + mMap, err := s.metadataProvider.GetMetadataForBlobs(keys) + if err != nil { + return nil, fmt.Errorf( + "error fetching metadata for blob, check if blob exists and is assigned to this relay: %w", err) + } + + requiredBandwidth := computeChunkRequestRequiredBandwidth(request, mMap) + err = s.chunkRateLimiter.RequestGetChunkBandwidth(time.Now(), clientID, requiredBandwidth) + if err != nil { + return nil, err + } + + frames, err := s.chunkProvider.GetFrames(ctx, mMap) + if err != nil { + return nil, fmt.Errorf("error fetching frames: %w", err) + } + + bytesToSend, err := gatherChunkDataToSend(frames, request) + if err != nil { + return nil, fmt.Errorf("error gathering chunk data: %w", err) + } + + return &pb.GetChunksReply{ + Data: bytesToSend, + }, nil +} + +// getKeysFromChunkRequest gathers a slice of blob keys from a GetChunks request. +func getKeysFromChunkRequest(request *pb.GetChunksRequest) ([]v2.BlobKey, error) { keys := make([]v2.BlobKey, 0, len(request.ChunkRequests)) for _, chunkRequest := range request.ChunkRequests { @@ -193,26 +230,16 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* keys = append(keys, key) } - mMap, err := s.metadataProvider.GetMetadataForBlobs(keys) - if err != nil { - return nil, fmt.Errorf( - "error fetching metadata for blob, check if blob exists and is assigned to this relay: %w", err) - } - - requiredBandwidth := computeChunkRequestRequiredBandwidth(request, mMap) - err = s.chunkRateLimiter.RequestGetChunkBandwidth(time.Now(), clientID, requiredBandwidth) - if err != nil { - return nil, err - } + return keys, nil +} - frames, err := s.chunkProvider.GetFrames(ctx, mMap) - if err != nil { - return nil, fmt.Errorf("error fetching frames: %w", err) - } +// gatherChunkDataToSend takes the chunk data and narrows it down to the data requested in the GetChunks request. +func gatherChunkDataToSend( + frames map[v2.BlobKey][]*encoding.Frame, + request *pb.GetChunksRequest) ([][]byte, error) { - bytesToSend := make([][]byte, 0, len(keys)) + bytesToSend := make([][]byte, 0, len(frames)) - // return data in the order that it was requested for _, chunkRequest := range request.ChunkRequests { framesToSend := make([]*encoding.Frame, 0) @@ -261,12 +288,9 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* bytesToSend = append(bytesToSend, bundleBytes) } - return &pb.GetChunksReply{ - Data: bytesToSend, - }, nil + return bytesToSend, nil } -// TODO unit test // computeChunkRequestRequiredBandwidth computes the bandwidth required to fulfill a GetChunks request. func computeChunkRequestRequiredBandwidth(request *pb.GetChunksRequest, mMap metadataMap) int { requiredBandwidth := 0 diff --git a/relay/server_test.go b/relay/server_test.go index 637fd9b7a7..583d64c365 100644 --- a/relay/server_test.go +++ b/relay/server_test.go @@ -305,6 +305,10 @@ func TestReadWriteChunks(t *testing.T) { // This is the server used to read it back config := DefaultConfig() + config.RateLimits.MaxGetChunkOpsPerSecond = 1000 + config.RateLimits.GetChunkOpsBurstiness = 1000 + config.RateLimits.MaxGetChunkOpsPerSecondClient = 1000 + config.RateLimits.GetChunkOpsBurstinessClient = 1000 server, err := NewServer( context.Background(), logger, @@ -621,6 +625,10 @@ func TestReadWriteChunksWithSharding(t *testing.T) { // This is the server used to read it back config := DefaultConfig() config.RelayIDs = shardList + config.RateLimits.MaxGetChunkOpsPerSecond = 1000 + config.RateLimits.GetChunkOpsBurstiness = 1000 + config.RateLimits.MaxGetChunkOpsPerSecondClient = 1000 + config.RateLimits.GetChunkOpsBurstinessClient = 1000 server, err := NewServer( context.Background(), logger, @@ -891,6 +899,10 @@ func TestBatchedReadWriteChunksWithSharding(t *testing.T) { // This is the server used to read it back config := DefaultConfig() config.RelayIDs = shardList + config.RateLimits.MaxGetChunkOpsPerSecond = 1000 + config.RateLimits.GetChunkOpsBurstiness = 1000 + config.RateLimits.MaxGetChunkOpsPerSecondClient = 1000 + config.RateLimits.GetChunkOpsBurstinessClient = 1000 server, err := NewServer( context.Background(), logger, From c1d83e6cd88e285495173178a4d7b027d743fea4 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 18 Nov 2024 11:21:52 -0600 Subject: [PATCH 08/45] Cleanup. Signed-off-by: Cody Littley --- relay/limiter/chunk_rate_limiter.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/relay/limiter/chunk_rate_limiter.go b/relay/limiter/chunk_rate_limiter.go index b2249ab5f8..96f62bd4bf 100644 --- a/relay/limiter/chunk_rate_limiter.go +++ b/relay/limiter/chunk_rate_limiter.go @@ -7,8 +7,6 @@ import ( "time" ) -// TODO test - // ChunkRateLimiter enforces rate limits on GetChunk operations. type ChunkRateLimiter struct { From ed4daca9ab9cb92d70a48292d77cd25305b6ec0a Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 18 Nov 2024 13:36:41 -0600 Subject: [PATCH 09/45] Added get chunks request hashing. Signed-off-by: Cody Littley --- api/grpc/relay/relay.pb.go | 8 +-- api/proto/relay/relay.proto | 2 +- relay/authentication/request_hashing.go | 42 ++++++++++++ relay/authentication/request_hashing_test.go | 71 ++++++++++++++++++++ 4 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 relay/authentication/request_hashing.go create mode 100644 relay/authentication/request_hashing_test.go diff --git a/api/grpc/relay/relay.pb.go b/api/grpc/relay/relay.pb.go index d01287a918..14fdac87ca 100644 --- a/api/grpc/relay/relay.pb.go +++ b/api/grpc/relay/relay.pb.go @@ -128,7 +128,7 @@ type GetChunksRequest struct { ChunkRequests []*ChunkRequest `protobuf:"bytes,1,rep,name=chunk_requests,json=chunkRequests,proto3" json:"chunk_requests,omitempty"` // If this is an authenticated request, this should hold the ID of the requester. If this // is an unauthenticated request, this field should be empty. - RequesterId uint64 `protobuf:"varint,2,opt,name=requester_id,json=requesterId,proto3" json:"requester_id,omitempty"` + RequesterId []byte `protobuf:"bytes,2,opt,name=requester_id,json=requesterId,proto3" json:"requester_id,omitempty"` // If this is an authenticated request, this field will hold a signature by the requester // on the chunks being requested. RequesterSignature []byte `protobuf:"bytes,3,opt,name=requester_signature,json=requesterSignature,proto3" json:"requester_signature,omitempty"` @@ -173,11 +173,11 @@ func (x *GetChunksRequest) GetChunkRequests() []*ChunkRequest { return nil } -func (x *GetChunksRequest) GetRequesterId() uint64 { +func (x *GetChunksRequest) GetRequesterId() []byte { if x != nil { return x.RequesterId } - return 0 + return nil } func (x *GetChunksRequest) GetRequesterSignature() []byte { @@ -462,7 +462,7 @@ var file_relay_relay_proto_rawDesc = []byte{ 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, 0x0d, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x65, 0x72, 0x49, 0x64, 0x12, 0x2f, 0x0a, 0x13, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x12, 0x72, 0x65, 0x71, 0x75, diff --git a/api/proto/relay/relay.proto b/api/proto/relay/relay.proto index ed52c0a0a5..82e405dbfe 100644 --- a/api/proto/relay/relay.proto +++ b/api/proto/relay/relay.proto @@ -34,7 +34,7 @@ message GetChunksRequest { // If this is an authenticated request, this should hold the ID of the requester. If this // is an unauthenticated request, this field should be empty. - uint64 requester_id = 2; + bytes requester_id = 2; // If this is an authenticated request, this field will hold a signature by the requester // on the chunks being requested. diff --git a/relay/authentication/request_hashing.go b/relay/authentication/request_hashing.go new file mode 100644 index 0000000000..37ac9a6b2f --- /dev/null +++ b/relay/authentication/request_hashing.go @@ -0,0 +1,42 @@ +package authentication + +import ( + "encoding/binary" + pb "github.com/Layr-Labs/eigenda/api/grpc/relay" + "golang.org/x/crypto/sha3" +) + +// HashGetChunksRequest hashes the given GetChunksRequest. +func HashGetChunksRequest(request *pb.GetChunksRequest) []byte { + + // Protobuf serialization is non-deterministic, so we can't just hash the + // serialized bytes. Instead, we have to define our own hashing function. + + hasher := sha3.NewLegacyKeccak256() + + hasher.Write(request.GetRequesterId()) + for _, chunkRequest := range request.GetChunkRequests() { + if chunkRequest.GetByIndex() != nil { + getByIndex := chunkRequest.GetByIndex() + hasher.Write(getByIndex.BlobKey) + for _, index := range getByIndex.ChunkIndices { + indexBytes := make([]byte, 4) + binary.BigEndian.PutUint32(indexBytes, index) + hasher.Write(indexBytes) + } + } else { + getByRange := chunkRequest.GetByRange() + hasher.Write(getByRange.BlobKey) + + startBytes := make([]byte, 4) + binary.BigEndian.PutUint32(startBytes, getByRange.StartIndex) + hasher.Write(startBytes) + + endBytes := make([]byte, 4) + binary.BigEndian.PutUint32(endBytes, getByRange.EndIndex) + hasher.Write(endBytes) + } + } + + return hasher.Sum(nil) +} diff --git a/relay/authentication/request_hashing_test.go b/relay/authentication/request_hashing_test.go new file mode 100644 index 0000000000..7dcd480dc9 --- /dev/null +++ b/relay/authentication/request_hashing_test.go @@ -0,0 +1,71 @@ +package authentication + +import ( + pb "github.com/Layr-Labs/eigenda/api/grpc/relay" + tu "github.com/Layr-Labs/eigenda/common/testutils" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" + "testing" +) + +func randomGetChunksRequest() *pb.GetChunksRequest { + requestedChunks := make([]*pb.ChunkRequest, 0) + requestCount := rand.Intn(10) + 1 + for i := 0; i < requestCount; i++ { + + if rand.Intn(2) == 0 { + indices := make([]uint32, rand.Intn(10)+1) + for j := 0; j < len(indices); j++ { + indices[j] = rand.Uint32() + } + requestedChunks = append(requestedChunks, &pb.ChunkRequest{ + Request: &pb.ChunkRequest_ByIndex{ + ByIndex: &pb.ChunkRequestByIndex{ + BlobKey: tu.RandomBytes(32), + ChunkIndices: indices, + }, + }, + }) + } else { + requestedChunks = append(requestedChunks, &pb.ChunkRequest{ + Request: &pb.ChunkRequest_ByRange{ + ByRange: &pb.ChunkRequestByRange{ + BlobKey: tu.RandomBytes(32), + StartIndex: rand.Uint32(), + EndIndex: rand.Uint32(), + }, + }, + }) + } + } + return &pb.GetChunksRequest{ + RequesterId: tu.RandomBytes(32), + ChunkRequests: requestedChunks, + } +} + +func TestHashGetChunksRequest(t *testing.T) { + tu.InitializeRandom() + + requestA := randomGetChunksRequest() + requestB := randomGetChunksRequest() + + // Hashing the same request twice should yield the same hash + hashA := HashGetChunksRequest(requestA) + hashAA := HashGetChunksRequest(requestA) + require.Equal(t, hashA, hashAA) + + // Hashing different requests should yield different hashes + hashB := HashGetChunksRequest(requestB) + require.NotEqual(t, hashA, hashB) + + // Adding a signature should not affect the hash + requestA.RequesterSignature = tu.RandomBytes(32) + hashAA = HashGetChunksRequest(requestA) + require.Equal(t, hashA, hashAA) + + // Changing the requester ID should change the hash + requestA.RequesterId = tu.RandomBytes(32) + hashAA = HashGetChunksRequest(requestA) + require.NotEqual(t, hashA, hashAA) +} From 3438b92ae707a2d55ff1842d31bee2fce4350931 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 18 Nov 2024 14:54:46 -0600 Subject: [PATCH 10/45] Start work on authenticator. Signed-off-by: Cody Littley --- relay/authentication/authenticator.go | 159 ++++++++++++++++++++++++++ relay/server.go | 26 ++++- 2 files changed, 182 insertions(+), 3 deletions(-) create mode 100644 relay/authentication/authenticator.go diff --git a/relay/authentication/authenticator.go b/relay/authentication/authenticator.go new file mode 100644 index 0000000000..a7622bb24a --- /dev/null +++ b/relay/authentication/authenticator.go @@ -0,0 +1,159 @@ +package authentication + +import ( + "context" + "errors" + "fmt" + pb "github.com/Layr-Labs/eigenda/api/grpc/relay" + "github.com/Layr-Labs/eigenda/core" + "sync" + "time" +) + +// RequestAuthenticator authenticates requests to the relay service. This object is thread safe. +type RequestAuthenticator interface { + // AuthenticateGetChunksRequest authenticates a GetChunksRequest, returning an error if the request is invalid. + // The address is the address of the peer that sent the request. This may be used to cache authentication results + // in order to save server resources. + AuthenticateGetChunksRequest( + address string, + request *pb.GetChunksRequest, + now time.Time) error +} + +// authenticationTimeout is used to track the expiration of an authentication. +type authenticationTimeout struct { + clientID string + expiration time.Time +} + +var _ RequestAuthenticator = &requestAuthenticator{} + +type requestAuthenticator struct { + ics core.IndexedChainState + + // authenticatedClients is a set of client IDs that have been recently authenticated. + authenticatedClients map[string]struct{} + + // authenticationTimeouts is a list of authentications that have been performed, along with their expiration times. + authenticationTimeouts []*authenticationTimeout + + // authenticationTimeoutDuration is the duration for which an authentication is valid. + // If this is zero, then authentication saving is disabled, and each request will be authenticated independently. + authenticationTimeoutDuration time.Duration + + // savedAuthLock is used for thread safe atomic modification of the authenticatedClients map and the + // authenticationTimeouts queue. + savedAuthLock sync.Mutex +} + +// NewRequestAuthenticator creates a new RequestAuthenticator. +func NewRequestAuthenticator( + ics core.IndexedChainState, + authenticationTimeoutDuration time.Duration) RequestAuthenticator { + + return &requestAuthenticator{ + ics: ics, + authenticatedClients: make(map[string]struct{}), + authenticationTimeouts: make([]*authenticationTimeout, 0), + authenticationTimeoutDuration: authenticationTimeoutDuration, + } +} + +func (a *requestAuthenticator) AuthenticateGetChunksRequest( + address string, + request *pb.GetChunksRequest, + now time.Time) error { + + if a == nil { + // do not enforce authentication if the authenticator is nil + return nil + } + + if a.isAuthenticationStillValid(now, address) { + // We've recently authenticated this client. Do not authenticate again for a while. + return nil + } + + blockNumber, err := a.ics.GetCurrentBlockNumber() + if err != nil { + return fmt.Errorf("failed to get current block number: %w", err) + } + operators, err := a.ics.GetIndexedOperators(context.Background(), blockNumber) + if err != nil { + return fmt.Errorf("failed to get operators: %w", err) + } + + operatorID := core.OperatorID(request.RequesterId) + operator, ok := operators[operatorID] + if !ok { + return errors.New("operator not found") + } + key := operator.PubkeyG2 + + g1Point, err := (&core.G1Point{}).Deserialize(request.RequesterSignature) + if err != nil { + return fmt.Errorf("failed to deserialize signature: %w", err) + } + + signature := core.Signature{ + G1Point: g1Point, + } + + hash := HashGetChunksRequest(request) + isValid := signature.Verify(key, ([32]byte)(hash)) + + if !isValid { + return errors.New("signature verification failed") + } + + a.saveAuthenticationResult(now, address) + return nil +} + +// saveAuthenticationResult saves the result of an authentication. +func (a *requestAuthenticator) saveAuthenticationResult(now time.Time, address string) { + if a.authenticationTimeoutDuration == 0 { + // Authentication saving is disabled. + return + } + + a.savedAuthLock.Lock() + defer a.savedAuthLock.Unlock() + + a.authenticatedClients[address] = struct{}{} + a.authenticationTimeouts = append(a.authenticationTimeouts, + &authenticationTimeout{ + clientID: address, + expiration: now.Add(a.authenticationTimeoutDuration), + }) +} + +// isAuthenticationStillValid returns true if the client at the given address has been authenticated recently. +func (a *requestAuthenticator) isAuthenticationStillValid(now time.Time, address string) bool { + if a.authenticationTimeoutDuration == 0 { + // Authentication saving is disabled. + return false + } + + a.savedAuthLock.Lock() + defer a.savedAuthLock.Unlock() + + a.removeOldAuthentications(now) + _, ok := a.authenticatedClients[address] + return ok +} + +// removeOldAuthentications removes any authentications that have expired. +func (a *requestAuthenticator) removeOldAuthentications(now time.Time) { + index := 0 + for ; index < len(a.authenticationTimeouts); index++ { + if a.authenticationTimeouts[index].expiration.After(now) { + break + } + delete(a.authenticatedClients, a.authenticationTimeouts[index].clientID) + } + if index > 0 { + a.authenticationTimeouts = a.authenticationTimeouts[index:] + } +} diff --git a/relay/server.go b/relay/server.go index c3bc217254..00d74ee80d 100644 --- a/relay/server.go +++ b/relay/server.go @@ -10,10 +10,12 @@ import ( "github.com/Layr-Labs/eigenda/core/v2" "github.com/Layr-Labs/eigenda/disperser/common/v2/blobstore" "github.com/Layr-Labs/eigenda/encoding" + "github.com/Layr-Labs/eigenda/relay/authentication" "github.com/Layr-Labs/eigenda/relay/chunkstore" "github.com/Layr-Labs/eigenda/relay/limiter" "github.com/Layr-Labs/eigensdk-go/logging" "google.golang.org/grpc" + "google.golang.org/grpc/peer" "google.golang.org/grpc/reflection" "net" "time" @@ -54,6 +56,9 @@ type Server struct { // grpcServer is the gRPC server. grpcServer *grpc.Server + + // authenticator is used to authenticate requests to the relay service. + authenticator authentication.RequestAuthenticator // TODO set this } // NewServer creates a new relay Server. @@ -96,6 +101,9 @@ func NewServer( return nil, fmt.Errorf("error creating chunk provider: %w", err) } + // TODO + authenticator := authentication.NewRequestAuthenticator(nil, 0) + return &Server{ config: config, logger: logger, @@ -106,6 +114,7 @@ func NewServer( chunkProvider: cp, blobRateLimiter: limiter.NewBlobRateLimiter(&config.RateLimits), chunkRateLimiter: limiter.NewChunkRateLimiter(&config.RateLimits), + authenticator: authenticator, }, nil } @@ -168,9 +177,20 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* "too many chunk requests provided, max is %d", s.config.MaxKeysPerGetChunksRequest) } - // Future work: client IDs will be fixed when authentication is implemented - clientID := fmt.Sprintf("%d", request.RequesterId) - err := s.chunkRateLimiter.BeginGetChunkOperation(time.Now(), clientID) + client, ok := peer.FromContext(ctx) + if !ok { + return nil, errors.New("could not get peer information") + } + clientAddress := client.Addr.String() + + err := s.authenticator.AuthenticateGetChunksRequest(clientAddress, request, time.Now()) + if err != nil { + return nil, fmt.Errorf("authentication failed: %w", err) + } + // TODO make methods take correct type + clientID := fmt.Sprintf("%x", request.RequesterId) + + err = s.chunkRateLimiter.BeginGetChunkOperation(time.Now(), clientID) if err != nil { return nil, err } From b5dc37c77c6f55d1a3dc6fdac80b7c43368efe0d Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 18 Nov 2024 14:58:29 -0600 Subject: [PATCH 11/45] Fix test issue. Signed-off-by: Cody Littley --- relay/limiter/chunk_rate_limiter_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/relay/limiter/chunk_rate_limiter_test.go b/relay/limiter/chunk_rate_limiter_test.go index c30349211e..482f3e536b 100644 --- a/relay/limiter/chunk_rate_limiter_test.go +++ b/relay/limiter/chunk_rate_limiter_test.go @@ -10,12 +10,13 @@ import ( ) func TestConcurrentGetChunksOperations(t *testing.T) { - tu.InitializeRandom() + tu.InitializeRandom(1172102200317107997) // TODO concurrencyLimit := 1 + rand.Intn(10) config := DefaultConfig() config.MaxConcurrentGetChunkOps = concurrencyLimit + config.MaxConcurrentGetChunkOpsClient = math.MaxInt32 config.GetChunkOpsBurstiness = math.MaxInt32 config.GetChunkOpsBurstinessClient = math.MaxInt32 From b420ccaf697b49a4be5285d7bd95a5e439b9ef36 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 18 Nov 2024 15:05:55 -0600 Subject: [PATCH 12/45] Cleanup Signed-off-by: Cody Littley --- relay/limiter/chunk_rate_limiter_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay/limiter/chunk_rate_limiter_test.go b/relay/limiter/chunk_rate_limiter_test.go index 482f3e536b..44f1aeca41 100644 --- a/relay/limiter/chunk_rate_limiter_test.go +++ b/relay/limiter/chunk_rate_limiter_test.go @@ -10,7 +10,7 @@ import ( ) func TestConcurrentGetChunksOperations(t *testing.T) { - tu.InitializeRandom(1172102200317107997) // TODO + tu.InitializeRandom() concurrencyLimit := 1 + rand.Intn(10) From 7982aec4cf25fa2c9cc7debf2a0b3e80c60c5922 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 10:27:15 -0600 Subject: [PATCH 13/45] Convert config to flag pattern. Signed-off-by: Cody Littley --- relay/cmd/config.go | 33 ++++--- relay/cmd/flags/flags.go | 119 +++++++++++++++++++++++ relay/limiter/blob_rate_limiter_test.go | 26 ++++- relay/limiter/chunk_rate_limiter_test.go | 12 +-- relay/limiter/config.go | 23 ----- relay/server.go | 15 --- relay/server_test.go | 23 +++-- 7 files changed, 183 insertions(+), 68 deletions(-) diff --git a/relay/cmd/config.go b/relay/cmd/config.go index c7b8b46fcc..bb7566f5a1 100644 --- a/relay/cmd/config.go +++ b/relay/cmd/config.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "github.com/Layr-Labs/eigenda/relay/limiter" "github.com/Layr-Labs/eigenda/common" "github.com/Layr-Labs/eigenda/common/aws" @@ -12,21 +13,6 @@ import ( ) // Config is the configuration for the relay Server. -// -// Environment variables are mapped into this struct by taking the name of the field in this struct, -// converting to upper case, and prepending "RELAY_". For example, "BlobCacheSize" can be set using the -// environment variable "RELAY_BLOBCACHESIZE". -// -// For nested structs, add the name of the struct variable before the field name, separated by an underscore. -// For example, "Log.Format" can be set using the environment variable "RELAY_LOG_FORMAT". -// -// Slice values can be set using a comma-separated list. For example, "RelayIDs" can be set using the environment -// variable "RELAY_RELAYIDS='1,2,3,4'". -// -// It is also possible to set the configuration using a configuration file. The path to the configuration file should -// be passed as the first argument to the relay binary, e.g. "bin/relay config.yaml". The structure of the config -// file should mirror the structure of this struct, with keys in the config file matching the field names -// of this struct. type Config struct { // Log is the configuration for the logger. Default is common.DefaultLoggerConfig(). @@ -70,6 +56,23 @@ func NewConfig(ctx *cli.Context) (Config, error) { BlobMaxConcurrency: ctx.Int(flags.BlobMaxConcurrencyFlag.Name), ChunkCacheSize: ctx.Int(flags.ChunkCacheSizeFlag.Name), ChunkMaxConcurrency: ctx.Int(flags.ChunkMaxConcurrencyFlag.Name), + RateLimits: limiter.Config{ + MaxGetBlobOpsPerSecond: ctx.Float64(flags.MaxGetBlobOpsPerSecondFlag.Name), + GetBlobOpsBurstiness: ctx.Int(flags.GetBlobOpsBurstinessFlag.Name), + MaxGetBlobBytesPerSecond: ctx.Float64(flags.MaxGetBlobBytesPerSecondFlag.Name), + GetBlobBytesBurstiness: ctx.Int(flags.GetBlobBytesBurstinessFlag.Name), + MaxConcurrentGetBlobOps: ctx.Int(flags.MaxConcurrentGetBlobOpsFlag.Name), + MaxGetChunkOpsPerSecond: ctx.Float64(flags.MaxGetChunkOpsPerSecondFlag.Name), + GetChunkOpsBurstiness: ctx.Int(flags.GetChunkOpsBurstinessFlag.Name), + MaxGetChunkBytesPerSecond: ctx.Float64(flags.MaxGetChunkBytesPerSecondFlag.Name), + GetChunkBytesBurstiness: ctx.Int(flags.GetChunkBytesBurstinessFlag.Name), + MaxConcurrentGetChunkOps: ctx.Int(flags.MaxConcurrentGetChunkOpsFlag.Name), + MaxGetChunkOpsPerSecondClient: ctx.Float64(flags.MaxGetChunkOpsPerSecondClientFlag.Name), + GetChunkOpsBurstinessClient: ctx.Int(flags.GetChunkOpsBurstinessClientFlag.Name), + MaxGetChunkBytesPerSecondClient: ctx.Float64(flags.MaxGetChunkBytesPerSecondClientFlag.Name), + GetChunkBytesBurstinessClient: ctx.Int(flags.GetChunkBytesBurstinessClientFlag.Name), + MaxConcurrentGetChunkOpsClient: ctx.Int(flags.MaxConcurrentGetChunkOpsClientFlag.Name), + }, }, } for i, id := range relayIDs { diff --git a/relay/cmd/flags/flags.go b/relay/cmd/flags/flags.go index 63e63369e5..9abd673566 100644 --- a/relay/cmd/flags/flags.go +++ b/relay/cmd/flags/flags.go @@ -85,6 +85,110 @@ var ( EnvVar: common.PrefixEnvVar(envVarPrefix, "CHUNK_MAX_CONCURRENCY"), Value: 32, } + MaxGetBlobOpsPerSecondFlag = cli.Float64Flag{ + Name: common.PrefixFlag(FlagPrefix, "max-get-blob-ops-per-second"), + Usage: "Max number of GetBlob operations per second", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_GET_BLOB_OPS_PER_SECOND"), + Value: 1024, + } + GetBlobOpsBurstinessFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "get-blob-ops-burstiness"), + Usage: "Burstiness of the GetBlob rate limiter", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "GET_BLOB_OPS_BURSTINESS"), + Value: 1024, + } + MaxGetBlobBytesPerSecondFlag = cli.Float64Flag{ + Name: common.PrefixFlag(FlagPrefix, "max-get-blob-bytes-per-second"), + Usage: "Max bandwidth for GetBlob operations in bytes per second", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_GET_BLOB_BYTES_PER_SECOND"), + Value: 20 * 1024 * 1024, + } + GetBlobBytesBurstinessFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "get-blob-bytes-burstiness"), + Usage: "Burstiness of the GetBlob bandwidth rate limiter", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "GET_BLOB_BYTES_BURSTINESS"), + Value: 20 * 1024 * 1024, + } + MaxConcurrentGetBlobOpsFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "max-concurrent-get-blob-ops"), + Usage: "Max number of concurrent GetBlob operations", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_CONCURRENT_GET_BLOB_OPS"), + Value: 1024, + } + MaxGetChunkOpsPerSecondFlag = cli.Float64Flag{ + Name: common.PrefixFlag(FlagPrefix, "max-get-chunk-ops-per-second"), + Usage: "Max number of GetChunk operations per second", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_GET_CHUNK_OPS_PER_SECOND"), + Value: 1024, + } + GetChunkOpsBurstinessFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "get-chunk-ops-burstiness"), + Usage: "Burstiness of the GetChunk rate limiter", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "GET_CHUNK_OPS_BURSTINESS"), + Value: 1024, + } + MaxGetChunkBytesPerSecondFlag = cli.Float64Flag{ + Name: common.PrefixFlag(FlagPrefix, "max-get-chunk-bytes-per-second"), + Usage: "Max bandwidth for GetChunk operations in bytes per second", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_GET_CHUNK_BYTES_PER_SECOND"), + Value: 20 * 1024 * 1024, + } + GetChunkBytesBurstinessFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "get-chunk-bytes-burstiness"), + Usage: "Burstiness of the GetChunk bandwidth rate limiter", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "GET_CHUNK_BYTES_BURSTINESS"), + Value: 20 * 1024 * 1024, + } + MaxConcurrentGetChunkOpsFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "max-concurrent-get-chunk-ops"), + Usage: "Max number of concurrent GetChunk operations", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_CONCURRENT_GET_CHUNK_OPS"), + Value: 1024, + } + MaxGetChunkOpsPerSecondClientFlag = cli.Float64Flag{ + Name: common.PrefixFlag(FlagPrefix, "max-get-chunk-ops-per-second-client"), + Usage: "Max number of GetChunk operations per second per client", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_GET_CHUNK_OPS_PER_SECOND_CLIENT"), + Value: 8, + } + GetChunkOpsBurstinessClientFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "get-chunk-ops-burstiness-client"), + Usage: "Burstiness of the GetChunk rate limiter per client", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "GET_CHUNK_OPS_BURSTINESS_CLIENT"), + Value: 8, + } + MaxGetChunkBytesPerSecondClientFlag = cli.Float64Flag{ + Name: common.PrefixFlag(FlagPrefix, "max-get-chunk-bytes-per-second-client"), + Usage: "Max bandwidth for GetChunk operations in bytes per second per client", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_GET_CHUNK_BYTES_PER_SECOND_CLIENT"), + Value: 2 * 1024 * 1024, + } + GetChunkBytesBurstinessClientFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "get-chunk-bytes-burstiness-client"), + Usage: "Burstiness of the GetChunk bandwidth rate limiter per client", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "GET_CHUNK_BYTES_BURSTINESS_CLIENT"), + } + MaxConcurrentGetChunkOpsClientFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "max-concurrent-get-chunk-ops-client"), + Usage: "Max number of concurrent GetChunk operations per client", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_CONCURRENT_GET_CHUNK_OPS_CLIENT"), + Value: 1, + } ) var requiredFlags = []cli.Flag{ @@ -102,6 +206,21 @@ var optionalFlags = []cli.Flag{ BlobMaxConcurrencyFlag, ChunkCacheSizeFlag, ChunkMaxConcurrencyFlag, + MaxGetBlobOpsPerSecondFlag, + GetBlobOpsBurstinessFlag, + MaxGetBlobBytesPerSecondFlag, + GetBlobBytesBurstinessFlag, + MaxConcurrentGetBlobOpsFlag, + MaxGetChunkOpsPerSecondFlag, + GetChunkOpsBurstinessFlag, + MaxGetChunkBytesPerSecondFlag, + GetChunkBytesBurstinessFlag, + MaxConcurrentGetChunkOpsFlag, + MaxGetChunkOpsPerSecondClientFlag, + GetChunkOpsBurstinessClientFlag, + MaxGetChunkBytesPerSecondClientFlag, + GetChunkBytesBurstinessClientFlag, + MaxConcurrentGetChunkOpsClientFlag, } var Flags []cli.Flag diff --git a/relay/limiter/blob_rate_limiter_test.go b/relay/limiter/blob_rate_limiter_test.go index 3c6267ea12..2966b6bea0 100644 --- a/relay/limiter/blob_rate_limiter_test.go +++ b/relay/limiter/blob_rate_limiter_test.go @@ -8,12 +8,32 @@ import ( "time" ) +func defaultConfig() *Config { + return &Config{ + MaxGetBlobOpsPerSecond: 1024, + GetBlobOpsBurstiness: 1024, + MaxGetBlobBytesPerSecond: 20 * 1024 * 1024, + GetBlobBytesBurstiness: 20 * 1024 * 1024, + MaxConcurrentGetBlobOps: 1024, + MaxGetChunkOpsPerSecond: 1024, + GetChunkOpsBurstiness: 1024, + MaxGetChunkBytesPerSecond: 20 * 1024 * 1024, + GetChunkBytesBurstiness: 20 * 1024 * 1024, + MaxConcurrentGetChunkOps: 1024, + MaxGetChunkOpsPerSecondClient: 8, + GetChunkOpsBurstinessClient: 8, + MaxGetChunkBytesPerSecondClient: 2 * 1024 * 1024, + GetChunkBytesBurstinessClient: 2 * 1024 * 1024, + MaxConcurrentGetChunkOpsClient: 1, + } +} + func TestConcurrentBlobOperations(t *testing.T) { tu.InitializeRandom() concurrencyLimit := 1 + rand.Intn(10) - config := DefaultConfig() + config := defaultConfig() config.MaxConcurrentGetBlobOps = concurrencyLimit // Make the burstiness limit high enough that we won't be rate limited config.GetBlobOpsBurstiness = concurrencyLimit * 100 @@ -44,7 +64,7 @@ func TestConcurrentBlobOperations(t *testing.T) { func TestGetBlobOpRateLimit(t *testing.T) { tu.InitializeRandom() - config := DefaultConfig() + config := defaultConfig() config.MaxGetBlobOpsPerSecond = float64(2 + rand.Intn(10)) config.GetBlobOpsBurstiness = int(config.MaxGetBlobOpsPerSecond) + rand.Intn(10) config.MaxConcurrentGetBlobOps = 1 @@ -105,7 +125,7 @@ func TestGetBlobOpRateLimit(t *testing.T) { func TestGetBlobBandwidthLimit(t *testing.T) { tu.InitializeRandom() - config := DefaultConfig() + config := defaultConfig() config.MaxGetBlobBytesPerSecond = float64(1024 + rand.Intn(1024*1024)) config.GetBlobBytesBurstiness = int(config.MaxGetBlobBytesPerSecond) + rand.Intn(1024*1024) diff --git a/relay/limiter/chunk_rate_limiter_test.go b/relay/limiter/chunk_rate_limiter_test.go index 44f1aeca41..59399ca17f 100644 --- a/relay/limiter/chunk_rate_limiter_test.go +++ b/relay/limiter/chunk_rate_limiter_test.go @@ -14,7 +14,7 @@ func TestConcurrentGetChunksOperations(t *testing.T) { concurrencyLimit := 1 + rand.Intn(10) - config := DefaultConfig() + config := defaultConfig() config.MaxConcurrentGetChunkOps = concurrencyLimit config.MaxConcurrentGetChunkOpsClient = math.MaxInt32 config.GetChunkOpsBurstiness = math.MaxInt32 @@ -48,7 +48,7 @@ func TestConcurrentGetChunksOperations(t *testing.T) { func TestGetChunksRateLimit(t *testing.T) { tu.InitializeRandom() - config := DefaultConfig() + config := defaultConfig() config.MaxGetChunkOpsPerSecond = float64(2 + rand.Intn(10)) config.GetChunkOpsBurstiness = int(config.MaxGetChunkOpsPerSecond) + rand.Intn(10) config.GetChunkOpsBurstinessClient = math.MaxInt32 @@ -113,7 +113,7 @@ func TestGetChunksRateLimit(t *testing.T) { func TestGetChunksBandwidthLimit(t *testing.T) { tu.InitializeRandom() - config := DefaultConfig() + config := defaultConfig() config.MaxGetChunkBytesPerSecond = float64(1024 + rand.Intn(1024*1024)) config.GetChunkBytesBurstiness = int(config.MaxGetBlobBytesPerSecond) + rand.Intn(1024*1024) config.GetChunkBytesBurstinessClient = math.MaxInt32 @@ -161,7 +161,7 @@ func TestGetChunksBandwidthLimit(t *testing.T) { func TestPerClientConcurrencyLimit(t *testing.T) { tu.InitializeRandom() - config := DefaultConfig() + config := defaultConfig() config.MaxConcurrentGetChunkOpsClient = 1 + rand.Intn(10) config.MaxConcurrentGetChunkOps = 2 * config.MaxConcurrentGetChunkOpsClient config.GetChunkOpsBurstinessClient = math.MaxInt32 @@ -210,7 +210,7 @@ func TestPerClientConcurrencyLimit(t *testing.T) { func TestOpLimitPerClient(t *testing.T) { tu.InitializeRandom() - config := DefaultConfig() + config := defaultConfig() config.MaxGetChunkOpsPerSecondClient = float64(2 + rand.Intn(10)) config.GetChunkOpsBurstinessClient = int(config.MaxGetChunkOpsPerSecondClient) + rand.Intn(10) config.GetChunkOpsBurstiness = math.MaxInt32 @@ -266,7 +266,7 @@ func TestOpLimitPerClient(t *testing.T) { func TestBandwidthLimitPerClient(t *testing.T) { tu.InitializeRandom() - config := DefaultConfig() + config := defaultConfig() config.MaxGetChunkBytesPerSecondClient = float64(1024 + rand.Intn(1024*1024)) config.GetChunkBytesBurstinessClient = int(config.MaxGetBlobBytesPerSecond) + rand.Intn(1024*1024) config.GetChunkBytesBurstiness = math.MaxInt32 diff --git a/relay/limiter/config.go b/relay/limiter/config.go index 5b33d6ccd9..5f19d9362a 100644 --- a/relay/limiter/config.go +++ b/relay/limiter/config.go @@ -63,26 +63,3 @@ type Config struct { // Default is 1. MaxConcurrentGetChunkOpsClient int } - -// DefaultConfig returns a default rate limit configuration. -func DefaultConfig() *Config { - return &Config{ - MaxGetBlobOpsPerSecond: 1024, - GetBlobOpsBurstiness: 1024, - MaxGetBlobBytesPerSecond: 20 * 1024 * 1024, - GetBlobBytesBurstiness: 20 * 1024 * 1024, - MaxConcurrentGetBlobOps: 1024, - - MaxGetChunkOpsPerSecond: 1024, - GetChunkOpsBurstiness: 1024, - MaxGetChunkBytesPerSecond: 20 * 1024 * 1024, - GetChunkBytesBurstiness: 20 * 1024 * 1024, - MaxConcurrentGetChunkOps: 1024, - - MaxGetChunkOpsPerSecondClient: 8, - GetChunkOpsBurstinessClient: 8, - MaxGetChunkBytesPerSecondClient: 2 * 1024 * 1024, - GetChunkBytesBurstinessClient: 2 * 1024 * 1024, - MaxConcurrentGetChunkOpsClient: 1, - } -} diff --git a/relay/server.go b/relay/server.go index d73e7a7d30..a42a4bfed7 100644 --- a/relay/server.go +++ b/relay/server.go @@ -5,8 +5,6 @@ import ( "errors" "fmt" pb "github.com/Layr-Labs/eigenda/api/grpc/relay" - "github.com/Layr-Labs/eigenda/common" - "github.com/Layr-Labs/eigenda/common/aws" "github.com/Layr-Labs/eigenda/common/healthcheck" "github.com/Layr-Labs/eigenda/core" v2 "github.com/Layr-Labs/eigenda/core/v2" @@ -52,15 +50,8 @@ type Server struct { grpcServer *grpc.Server } -// Config is the configuration for the relay Server. type Config struct { - // Log is the configuration for the logger. - Log common.LoggerConfig - - // Configuration for the AWS client. - AWS aws.ClientConfig - // RelayIDs contains the IDs of the relays that this server is willing to serve data for. If empty, the server will // serve data for any shard it can. RelayIDs []v2.RelayKey @@ -68,12 +59,6 @@ type Config struct { // GRPCPort is the port that the relay server listens on. GRPCPort int - // BucketName is the name of the S3 bucket that stores blobs. - BucketName string - - // MetadataTableName is the name of the DynamoDB table that stores metadata. - MetadataTableName string - // MaxGRPCMessageSize is the maximum size of a gRPC message that the server will accept. MaxGRPCMessageSize int diff --git a/relay/server_test.go b/relay/server_test.go index 77b4cdd992..cedfa6ddb4 100644 --- a/relay/server_test.go +++ b/relay/server_test.go @@ -2,7 +2,6 @@ package relay import ( "context" - "github.com/Layr-Labs/eigenda/common/aws" "github.com/Layr-Labs/eigenda/relay/limiter" "math/rand" "testing" @@ -20,12 +19,8 @@ import ( func defaultConfig() *Config { return &Config{ - Log: common.DefaultLoggerConfig(), - AWS: *aws.DefaultClientConfig(), GRPCPort: 50051, MaxGRPCMessageSize: 1024 * 1024 * 300, - BucketName: "relay", - MetadataTableName: "metadata", MetadataCacheSize: 1024 * 1024, MetadataMaxConcurrency: 32, BlobCacheSize: 32, @@ -33,7 +28,23 @@ func defaultConfig() *Config { ChunkCacheSize: 32, ChunkMaxConcurrency: 32, MaxKeysPerGetChunksRequest: 1024, - RateLimits: *limiter.DefaultConfig(), + RateLimits: limiter.Config{ + MaxGetBlobOpsPerSecond: 1024, + GetBlobOpsBurstiness: 1024, + MaxGetBlobBytesPerSecond: 20 * 1024 * 1024, + GetBlobBytesBurstiness: 20 * 1024 * 1024, + MaxConcurrentGetBlobOps: 1024, + MaxGetChunkOpsPerSecond: 1024, + GetChunkOpsBurstiness: 1024, + MaxGetChunkBytesPerSecond: 20 * 1024 * 1024, + GetChunkBytesBurstiness: 20 * 1024 * 1024, + MaxConcurrentGetChunkOps: 1024, + MaxGetChunkOpsPerSecondClient: 8, + GetChunkOpsBurstinessClient: 8, + MaxGetChunkBytesPerSecondClient: 2 * 1024 * 1024, + GetChunkBytesBurstinessClient: 2 * 1024 * 1024, + MaxConcurrentGetChunkOpsClient: 1, + }, } } From 366fdf7838d11cad552637f816b01e04d3aefff4 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 11:02:55 -0600 Subject: [PATCH 14/45] Simplify rate limiter classes. Signed-off-by: Cody Littley --- relay/limiter/blob_rate_limiter.go | 31 ++++++++++----- relay/limiter/chunk_rate_limiter.go | 62 ++++++++++++++--------------- relay/server.go | 1 + 3 files changed, 52 insertions(+), 42 deletions(-) diff --git a/relay/limiter/blob_rate_limiter.go b/relay/limiter/blob_rate_limiter.go index 11dc12c7c9..0ac260cba8 100644 --- a/relay/limiter/blob_rate_limiter.go +++ b/relay/limiter/blob_rate_limiter.go @@ -3,7 +3,7 @@ package limiter import ( "fmt" "golang.org/x/time/rate" - "sync/atomic" + "sync" "time" ) @@ -21,7 +21,10 @@ type BlobRateLimiter struct { bandwidthLimiter *rate.Limiter // operationsInFlight is the number of GetBlob operations currently in flight. - operationsInFlight atomic.Int64 + operationsInFlight int + + // this lock is used to provide thread safety + lock sync.Mutex } // NewBlobRateLimiter creates a new BlobRateLimiter. @@ -50,18 +53,19 @@ func (l *BlobRateLimiter) BeginGetBlobOperation(now time.Time) error { return nil } - countInFlight := l.operationsInFlight.Add(1) - if countInFlight > int64(l.config.MaxConcurrentGetBlobOps) { - l.operationsInFlight.Add(-1) + l.lock.Lock() + defer l.lock.Unlock() + + if l.operationsInFlight >= l.config.MaxConcurrentGetBlobOps { return fmt.Errorf("global concurrent request limit exceeded for getBlob operations, try again later") } - - allowed := l.opLimiter.AllowN(now, 1) - - if !allowed { - l.operationsInFlight.Add(-1) + if l.opLimiter.TokensAt(now) < 1 { return fmt.Errorf("global rate limit exceeded for getBlob operations, try again later") } + + l.operationsInFlight++ + l.opLimiter.AllowN(now, 1) + return nil } @@ -73,7 +77,10 @@ func (l *BlobRateLimiter) FinishGetBlobOperation() { return } - l.operationsInFlight.Add(-1) + l.lock.Lock() + defer l.lock.Unlock() + + l.operationsInFlight-- } // RequestGetBlobBandwidth should be called when a GetBlob is about to start downloading blob data @@ -85,6 +92,8 @@ func (l *BlobRateLimiter) RequestGetBlobBandwidth(now time.Time, bytes uint32) e return nil } + // no locking needed, the only thing we touch here is the bandwidthLimiter, which is inherently thread-safe + allowed := l.bandwidthLimiter.AllowN(now, int(bytes)) if !allowed { return fmt.Errorf("global rate limit exceeded for getBlob bandwidth, try again later") diff --git a/relay/limiter/chunk_rate_limiter.go b/relay/limiter/chunk_rate_limiter.go index 96f62bd4bf..fe899e5b17 100644 --- a/relay/limiter/chunk_rate_limiter.go +++ b/relay/limiter/chunk_rate_limiter.go @@ -3,7 +3,7 @@ package limiter import ( "fmt" "golang.org/x/time/rate" - "sync/atomic" + "sync" "time" ) @@ -22,7 +22,7 @@ type ChunkRateLimiter struct { globalBandwidthLimiter *rate.Limiter // globalOperationsInFlight is the number of GetChunk operations currently in flight. - globalOperationsInFlight atomic.Int64 + globalOperationsInFlight int // per-client limiters @@ -38,7 +38,10 @@ type ChunkRateLimiter struct { perClientBandwidthLimiter map[string]*rate.Limiter // perClientOperationsInFlight is the number of GetChunk operations currently in flight for each client. - perClientOperationsInFlight map[string]*atomic.Int64 + perClientOperationsInFlight map[string]int + + // this lock is used to provide thread safety + lock sync.Mutex } // NewChunkRateLimiter creates a new ChunkRateLimiter. @@ -56,10 +59,9 @@ func NewChunkRateLimiter(config *Config) *ChunkRateLimiter { config: config, globalOpLimiter: globalOpLimiter, globalBandwidthLimiter: globalBandwidthLimiter, - globalOperationsInFlight: atomic.Int64{}, perClientOpLimiter: make(map[string]*rate.Limiter), perClientBandwidthLimiter: make(map[string]*rate.Limiter), - perClientOperationsInFlight: make(map[string]*atomic.Int64), + perClientOperationsInFlight: make(map[string]int), } } @@ -74,24 +76,13 @@ func (l *ChunkRateLimiter) BeginGetChunkOperation( return nil } - countInFlight := l.globalOperationsInFlight.Add(1) - if countInFlight > int64(l.config.MaxConcurrentGetChunkOps) { - l.globalOperationsInFlight.Add(-1) - return fmt.Errorf("global concurrent request limit exceeded for GetChunks operations, try again later") - } + l.lock.Lock() + defer l.lock.Unlock() - allowed := l.globalOpLimiter.AllowN(now, 1) - if !allowed { - l.globalOperationsInFlight.Add(-1) - return fmt.Errorf("global rate limit exceeded for GetChunks operations, try again later") - } - - clientInFlightCounter, ok := l.perClientOperationsInFlight[requesterID] + _, ok := l.perClientOperationsInFlight[requesterID] if !ok { // This is the first time we've seen this client ID. - - l.perClientOperationsInFlight[requesterID] = &atomic.Int64{} - clientInFlightCounter = l.perClientOperationsInFlight[requesterID] + l.perClientOperationsInFlight[requesterID] = 0 l.perClientOpLimiter[requesterID] = rate.NewLimiter( rate.Limit(l.config.MaxGetChunkOpsPerSecondClient), @@ -102,20 +93,24 @@ func (l *ChunkRateLimiter) BeginGetChunkOperation( l.config.GetChunkBytesBurstinessClient) } - countInFlight = clientInFlightCounter.Add(1) - if countInFlight > int64(l.config.MaxConcurrentGetChunkOpsClient) { - l.globalOperationsInFlight.Add(-1) - clientInFlightCounter.Add(-1) + if l.globalOperationsInFlight >= l.config.MaxConcurrentGetChunkOps { + return fmt.Errorf("global concurrent request limit exceeded for GetChunks operations, try again later") + } + if l.globalOpLimiter.TokensAt(now) < 1 { + return fmt.Errorf("global rate limit exceeded for GetChunks operations, try again later") + } + if l.perClientOperationsInFlight[requesterID] >= l.config.MaxConcurrentGetChunkOpsClient { return fmt.Errorf("client concurrent request limit exceeded for GetChunks") } - - allowed = l.perClientOpLimiter[requesterID].AllowN(now, 1) - if !allowed { - l.globalOperationsInFlight.Add(-1) - clientInFlightCounter.Add(-1) + if l.perClientOpLimiter[requesterID].TokensAt(now) < 1 { return fmt.Errorf("client rate limit exceeded for GetChunks, try again later") } + l.globalOperationsInFlight++ + l.perClientOperationsInFlight[requesterID]++ + l.globalOpLimiter.AllowN(now, 1) + l.perClientOpLimiter[requesterID].AllowN(now, 1) + return nil } @@ -125,8 +120,11 @@ func (l *ChunkRateLimiter) FinishGetChunkOperation(requesterID string) { return } - l.globalOperationsInFlight.Add(-1) - l.perClientOperationsInFlight[requesterID].Add(-1) + l.lock.Lock() + defer l.lock.Unlock() + + l.globalOperationsInFlight-- + l.perClientOperationsInFlight[requesterID]-- } // RequestGetChunkBandwidth should be called when a GetChunk is about to start downloading chunk data. @@ -136,6 +134,8 @@ func (l *ChunkRateLimiter) RequestGetChunkBandwidth(now time.Time, requesterID s return nil } + // no lock needed here, as the bandwidth limiters themselves are thread-safe + allowed := l.globalBandwidthLimiter.AllowN(now, bytes) if !allowed { return fmt.Errorf("global rate limit exceeded for GetChunk bandwidth, try again later") diff --git a/relay/server.go b/relay/server.go index a42a4bfed7..bdc46228fd 100644 --- a/relay/server.go +++ b/relay/server.go @@ -150,6 +150,7 @@ func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.G if err != nil { return nil, err } + defer s.blobRateLimiter.FinishGetBlobOperation() key, err := v2.BytesToBlobKey(request.BlobKey) if err != nil { From f2c10e4949cf080a9be80f199bae71b265a3aa93 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 11:12:49 -0600 Subject: [PATCH 15/45] Made suggested changes. Signed-off-by: Cody Littley --- relay/metadata_provider.go | 3 +++ relay/server.go | 22 +++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/relay/metadata_provider.go b/relay/metadata_provider.go index 92620099f5..3e32924072 100644 --- a/relay/metadata_provider.go +++ b/relay/metadata_provider.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/Layr-Labs/eigenda/core/v2" "github.com/Layr-Labs/eigenda/disperser/common/v2/blobstore" + "github.com/Layr-Labs/eigenda/encoding" "github.com/Layr-Labs/eigenda/relay/cache" "github.com/Layr-Labs/eigensdk-go/logging" "sync/atomic" @@ -155,8 +156,10 @@ func (m *metadataProvider) fetchMetadata(key v2.BlobKey) (*blobMetadata, error) } } + // TODO(cody-littley): blob size is not correct https://github.com/Layr-Labs/eigenda/pull/906#discussion_r1847396530 blobSize := uint32(cert.BlobHeader.BlobCommitments.Length) chunkSize, err := v2.GetChunkLength(cert.BlobHeader.BlobVersion, blobSize) + chunkSize *= encoding.BYTES_PER_SYMBOL if err != nil { return nil, fmt.Errorf("error getting chunk length: %w", err) } diff --git a/relay/server.go b/relay/server.go index bdc46228fd..ad6072b9fe 100644 --- a/relay/server.go +++ b/relay/server.go @@ -143,7 +143,7 @@ func NewServer( // GetBlob retrieves a blob stored by the relay. func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.GetBlobReply, error) { - // Future work: + // TODO(cody-littley): // - timeouts err := s.blobRateLimiter.BeginGetBlobOperation(time.Now()) @@ -188,7 +188,7 @@ func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.G // GetChunks retrieves chunks from blobs stored by the relay. func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (*pb.GetChunksReply, error) { - // Future work: + // TODO(cody-littley): // - authentication // - timeouts @@ -219,7 +219,10 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* "error fetching metadata for blob, check if blob exists and is assigned to this relay: %w", err) } - requiredBandwidth := computeChunkRequestRequiredBandwidth(request, mMap) + requiredBandwidth, err := computeChunkRequestRequiredBandwidth(request, mMap) + if err != nil { + return nil, fmt.Errorf("error computing required bandwidth: %w", err) + } err = s.chunkRateLimiter.RequestGetChunkBandwidth(time.Now(), clientID, requiredBandwidth) if err != nil { return nil, err @@ -324,26 +327,31 @@ func gatherChunkDataToSend( } // computeChunkRequestRequiredBandwidth computes the bandwidth required to fulfill a GetChunks request. -func computeChunkRequestRequiredBandwidth(request *pb.GetChunksRequest, mMap metadataMap) int { +func computeChunkRequestRequiredBandwidth(request *pb.GetChunksRequest, mMap metadataMap) (int, error) { requiredBandwidth := 0 for _, req := range request.ChunkRequests { var metadata *blobMetadata + var key v2.BlobKey var requestedChunks int if req.GetByIndex() != nil { - key := v2.BlobKey(req.GetByIndex().GetBlobKey()) + key = v2.BlobKey(req.GetByIndex().GetBlobKey()) metadata = mMap[key] requestedChunks = len(req.GetByIndex().ChunkIndices) } else { - key := v2.BlobKey(req.GetByRange().GetBlobKey()) + key = v2.BlobKey(req.GetByRange().GetBlobKey()) metadata = mMap[key] requestedChunks = int(req.GetByRange().EndIndex - req.GetByRange().StartIndex) } + if metadata == nil { + return 0, fmt.Errorf("metadata not found for key %s", key.Hex()) + } + requiredBandwidth += requestedChunks * int(metadata.chunkSizeBytes) } - return requiredBandwidth + return requiredBandwidth, nil } From c2bb9c33b05cb97b939bd2076edb445b8e885caa Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 11:21:54 -0600 Subject: [PATCH 16/45] Shorten package name. Signed-off-by: Cody Littley --- core/data.go | 2 +- disperser/apiserver/disperse_blob_v2.go | 2 +- disperser/apiserver/server_v2_test.go | 2 +- disperser/dataapi/docs/docs.go | 2 +- relay/{authentication => auth}/authenticator.go | 14 +++++++------- relay/{authentication => auth}/request_hashing.go | 2 +- .../request_hashing_test.go | 2 +- relay/limiter/chunk_rate_limiter.go | 4 ++-- relay/server.go | 10 +++++----- 9 files changed, 20 insertions(+), 20 deletions(-) rename relay/{authentication => auth}/authenticator.go (92%) rename relay/{authentication => auth}/request_hashing.go (98%) rename relay/{authentication => auth}/request_hashing_test.go (98%) diff --git a/core/data.go b/core/data.go index 61f6be0e77..f3c8a154b9 100644 --- a/core/data.go +++ b/core/data.go @@ -30,7 +30,7 @@ type SecurityParam struct { AdversaryThreshold uint8 // ConfirmationThreshold is the amount of stake that must sign a message for it to be considered valid as a percentage of the total stake in the quorum ConfirmationThreshold uint8 - // Rate Limit. This is a temporary measure until the node can derive rates on its own using rollup authentication. This is used + // Rate Limit. This is a temporary measure until the node can derive rates on its own using rollup auth. This is used // for restricting the rate at which retrievers are able to download data from the DA node to a multiple of the rate at which the // data was posted to the DA node. QuorumRate common.RateParam diff --git a/disperser/apiserver/disperse_blob_v2.go b/disperser/apiserver/disperse_blob_v2.go index d39d75f2cd..10e3ca5a9d 100644 --- a/disperser/apiserver/disperse_blob_v2.go +++ b/disperser/apiserver/disperse_blob_v2.go @@ -110,7 +110,7 @@ func (s *DispersalServerV2) validateDispersalRequest(req *pb.DisperseBlobRequest return api.NewErrorInvalidArg(fmt.Sprintf("invalid blob header: %s", err.Error())) } if err = s.authenticator.AuthenticateBlobRequest(blobHeader); err != nil { - return api.NewErrorInvalidArg(fmt.Sprintf("authentication failed: %s", err.Error())) + return api.NewErrorInvalidArg(fmt.Sprintf("auth failed: %s", err.Error())) } // TODO(ian-shim): validate commitment, length is power of 2 and less than maxNumSymbolsPerBlob, payment metadata diff --git a/disperser/apiserver/server_v2_test.go b/disperser/apiserver/server_v2_test.go index 0c2697366f..495e40acd8 100644 --- a/disperser/apiserver/server_v2_test.go +++ b/disperser/apiserver/server_v2_test.go @@ -200,7 +200,7 @@ func TestV2DisperseBlobRequestValidation(t *testing.T) { Data: data, BlobHeader: invalidReqProto, }) - assert.ErrorContains(t, err, "authentication failed") + assert.ErrorContains(t, err, "auth failed") } func TestV2GetBlobStatus(t *testing.T) { diff --git a/disperser/dataapi/docs/docs.go b/disperser/dataapi/docs/docs.go index 6c8a7c4874..9cf7dff3b1 100644 --- a/disperser/dataapi/docs/docs.go +++ b/disperser/dataapi/docs/docs.go @@ -765,7 +765,7 @@ const docTemplate = `{ "type": "integer" }, "quorumRate": { - "description": "Rate Limit. This is a temporary measure until the node can derive rates on its own using rollup authentication. This is used\nfor restricting the rate at which retrievers are able to download data from the DA node to a multiple of the rate at which the\ndata was posted to the DA node.", + "description": "Rate Limit. This is a temporary measure until the node can derive rates on its own using rollup auth. This is used\nfor restricting the rate at which retrievers are able to download data from the DA node to a multiple of the rate at which the\ndata was posted to the DA node.", "type": "integer" } } diff --git a/relay/authentication/authenticator.go b/relay/auth/authenticator.go similarity index 92% rename from relay/authentication/authenticator.go rename to relay/auth/authenticator.go index a7622bb24a..ee6f67e8b2 100644 --- a/relay/authentication/authenticator.go +++ b/relay/auth/authenticator.go @@ -1,4 +1,4 @@ -package authentication +package auth import ( "context" @@ -13,7 +13,7 @@ import ( // RequestAuthenticator authenticates requests to the relay service. This object is thread safe. type RequestAuthenticator interface { // AuthenticateGetChunksRequest authenticates a GetChunksRequest, returning an error if the request is invalid. - // The address is the address of the peer that sent the request. This may be used to cache authentication results + // The address is the address of the peer that sent the request. This may be used to cache auth results // in order to save server resources. AuthenticateGetChunksRequest( address string, @@ -21,7 +21,7 @@ type RequestAuthenticator interface { now time.Time) error } -// authenticationTimeout is used to track the expiration of an authentication. +// authenticationTimeout is used to track the expiration of an auth. type authenticationTimeout struct { clientID string expiration time.Time @@ -38,8 +38,8 @@ type requestAuthenticator struct { // authenticationTimeouts is a list of authentications that have been performed, along with their expiration times. authenticationTimeouts []*authenticationTimeout - // authenticationTimeoutDuration is the duration for which an authentication is valid. - // If this is zero, then authentication saving is disabled, and each request will be authenticated independently. + // authenticationTimeoutDuration is the duration for which an auth is valid. + // If this is zero, then auth saving is disabled, and each request will be authenticated independently. authenticationTimeoutDuration time.Duration // savedAuthLock is used for thread safe atomic modification of the authenticatedClients map and the @@ -66,7 +66,7 @@ func (a *requestAuthenticator) AuthenticateGetChunksRequest( now time.Time) error { if a == nil { - // do not enforce authentication if the authenticator is nil + // do not enforce auth if the authenticator is nil return nil } @@ -111,7 +111,7 @@ func (a *requestAuthenticator) AuthenticateGetChunksRequest( return nil } -// saveAuthenticationResult saves the result of an authentication. +// saveAuthenticationResult saves the result of an auth. func (a *requestAuthenticator) saveAuthenticationResult(now time.Time, address string) { if a.authenticationTimeoutDuration == 0 { // Authentication saving is disabled. diff --git a/relay/authentication/request_hashing.go b/relay/auth/request_hashing.go similarity index 98% rename from relay/authentication/request_hashing.go rename to relay/auth/request_hashing.go index 37ac9a6b2f..3d49f566d1 100644 --- a/relay/authentication/request_hashing.go +++ b/relay/auth/request_hashing.go @@ -1,4 +1,4 @@ -package authentication +package auth import ( "encoding/binary" diff --git a/relay/authentication/request_hashing_test.go b/relay/auth/request_hashing_test.go similarity index 98% rename from relay/authentication/request_hashing_test.go rename to relay/auth/request_hashing_test.go index 7dcd480dc9..ded7180a1a 100644 --- a/relay/authentication/request_hashing_test.go +++ b/relay/auth/request_hashing_test.go @@ -1,4 +1,4 @@ -package authentication +package auth import ( pb "github.com/Layr-Labs/eigenda/api/grpc/relay" diff --git a/relay/limiter/chunk_rate_limiter.go b/relay/limiter/chunk_rate_limiter.go index fe899e5b17..0db62648cd 100644 --- a/relay/limiter/chunk_rate_limiter.go +++ b/relay/limiter/chunk_rate_limiter.go @@ -27,8 +27,8 @@ type ChunkRateLimiter struct { // per-client limiters // Note: in its current form, these expose a DOS vector, since an attacker can create many clients IDs - // and force these maps to become arbitrarily large. This will be remedied when authentication - // is implemented, as only authentication will happen prior to rate limiting. + // and force these maps to become arbitrarily large. This will be remedied when auth + // is implemented, as only auth will happen prior to rate limiting. // perClientOpLimiter enforces per-client rate limits on the maximum rate of GetChunk operations perClientOpLimiter map[string]*rate.Limiter diff --git a/relay/server.go b/relay/server.go index 926e91b0b7..86864115be 100644 --- a/relay/server.go +++ b/relay/server.go @@ -10,7 +10,7 @@ import ( v2 "github.com/Layr-Labs/eigenda/core/v2" "github.com/Layr-Labs/eigenda/disperser/common/v2/blobstore" "github.com/Layr-Labs/eigenda/encoding" - "github.com/Layr-Labs/eigenda/relay/authentication" + "github.com/Layr-Labs/eigenda/relay/auth" "github.com/Layr-Labs/eigenda/relay/chunkstore" "github.com/Layr-Labs/eigenda/relay/limiter" "github.com/Layr-Labs/eigensdk-go/logging" @@ -52,7 +52,7 @@ type Server struct { grpcServer *grpc.Server // authenticator is used to authenticate requests to the relay service. - authenticator authentication.RequestAuthenticator // TODO set this + authenticator auth.RequestAuthenticator // TODO set this } type Config struct { @@ -135,7 +135,7 @@ func NewServer( } // TODO - authenticator := authentication.NewRequestAuthenticator(nil, 0) + authenticator := auth.NewRequestAuthenticator(nil, 0) return &Server{ config: config, @@ -198,7 +198,7 @@ func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.G func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (*pb.GetChunksReply, error) { // TODO(cody-littley): - // - authentication + // - auth // - timeouts if len(request.ChunkRequests) <= 0 { @@ -217,7 +217,7 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* err := s.authenticator.AuthenticateGetChunksRequest(clientAddress, request, time.Now()) if err != nil { - return nil, fmt.Errorf("authentication failed: %w", err) + return nil, fmt.Errorf("auth failed: %w", err) } // TODO make methods take correct type clientID := fmt.Sprintf("%x", request.RequesterId) From 151305b7280eadfbb01e85b8f6a58449ed29a277 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 12:05:39 -0600 Subject: [PATCH 17/45] Started testing Signed-off-by: Cody Littley --- relay/auth/authenticator_test.go | 71 ++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 relay/auth/authenticator_test.go diff --git a/relay/auth/authenticator_test.go b/relay/auth/authenticator_test.go new file mode 100644 index 0000000000..51c7583a50 --- /dev/null +++ b/relay/auth/authenticator_test.go @@ -0,0 +1,71 @@ +package auth + +import ( + "context" + pb "github.com/Layr-Labs/eigenda/api/grpc/relay" + tu "github.com/Layr-Labs/eigenda/common/testutils" + "github.com/Layr-Labs/eigenda/core" + "github.com/Layr-Labs/eigenda/core/mock" + "github.com/stretchr/testify/require" + "testing" + "time" +) + +// TestMockSigning is a meta-test to verify that +// the test framework's BLS keys are functioning correctly. +func TestMockSigning(t *testing.T) { + tu.InitializeRandom() + + operatorID := mock.MakeOperatorId(0) + stakes := map[core.QuorumID]map[core.OperatorID]int{ + core.QuorumID(0): { + operatorID: 1, + }, + } + ics, err := mock.NewChainDataMock(stakes) + require.NoError(t, err) + + operators, err := ics.GetIndexedOperators(context.Background(), 0) + require.NoError(t, err) + + operator, ok := operators[operatorID] + require.True(t, ok) + + bytesToSign := tu.RandomBytes(32) + signature := ics.KeyPairs[operatorID].SignMessage([32]byte(bytesToSign)) + + isValid := signature.Verify(operator.PubkeyG2, [32]byte(bytesToSign)) + require.True(t, isValid) + + // Changing a byte in the message should invalidate the signature + bytesToSign[0] = bytesToSign[0] ^ 1 + + isValid = signature.Verify(operator.PubkeyG2, [32]byte(bytesToSign)) + require.False(t, isValid) +} + +func TestNonExistingClient(t *testing.T) { + tu.InitializeRandom() + + operatorID := mock.MakeOperatorId(0) + stakes := map[core.QuorumID]map[core.OperatorID]int{ + core.QuorumID(0): { + operatorID: 1, + }, + } + ics, err := mock.NewChainDataMock(stakes) + require.NoError(t, err) + + timeout := 10 * time.Second + + authenticator := NewRequestAuthenticator(ics, timeout) + + invalidOperatorID := tu.RandomBytes(32) + + err = authenticator.AuthenticateGetChunksRequest( + string(invalidOperatorID), + &pb.GetChunksRequest{ + RequesterId: invalidOperatorID, + }, + time.Now()) +} From 29fd940f3ebe3e51be2a90c88b8a5b5ec801c250 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 12:24:39 -0600 Subject: [PATCH 18/45] Finished unit tests. Signed-off-by: Cody Littley --- relay/auth/authenticator_test.go | 127 +++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 5 deletions(-) diff --git a/relay/auth/authenticator_test.go b/relay/auth/authenticator_test.go index 51c7583a50..abd14d357d 100644 --- a/relay/auth/authenticator_test.go +++ b/relay/auth/authenticator_test.go @@ -2,7 +2,6 @@ package auth import ( "context" - pb "github.com/Layr-Labs/eigenda/api/grpc/relay" tu "github.com/Layr-Labs/eigenda/common/testutils" "github.com/Layr-Labs/eigenda/core" "github.com/Layr-Labs/eigenda/core/mock" @@ -44,6 +43,68 @@ func TestMockSigning(t *testing.T) { require.False(t, isValid) } +func TestValidRequest(t *testing.T) { + tu.InitializeRandom() + + operatorID := mock.MakeOperatorId(0) + stakes := map[core.QuorumID]map[core.OperatorID]int{ + core.QuorumID(0): { + operatorID: 1, + }, + } + ics, err := mock.NewChainDataMock(stakes) + require.NoError(t, err) + + timeout := 10 * time.Second + + authenticator := NewRequestAuthenticator(ics, timeout) + + request := randomGetChunksRequest() + request.RequesterId = operatorID[:] + hash := HashGetChunksRequest(request) + signature := ics.KeyPairs[operatorID].SignMessage([32]byte(hash)) + request.RequesterSignature = signature.G1Point.Serialize() + + now := time.Now() + + ics.Mock.On("GetCurrentBlockNumber").Return(uint(0), nil) + err = authenticator.AuthenticateGetChunksRequest( + "foobar", + request, + now) + require.NoError(t, err) + + // Making additional requests before timeout elapses should not trigger authentication for the address "foobar". + // To probe at this, intentionally make a request that would be considered invalid if it were authenticated. + invalidRequest := randomGetChunksRequest() + invalidRequest.RequesterId = operatorID[:] + invalidRequest.RequesterSignature = signature.G1Point.Serialize() // the previous signature is invalid here + + start := now + for now.Before(start.Add(timeout)) { + err = authenticator.AuthenticateGetChunksRequest( + "foobar", + invalidRequest, + now) + require.NoError(t, err) + + err = authenticator.AuthenticateGetChunksRequest( + "baz", + invalidRequest, + now) + require.Error(t, err) + + now = now.Add(time.Second) + } + + // After the timeout elapses, new requests should trigger authentication. + err = authenticator.AuthenticateGetChunksRequest( + "foobar", + invalidRequest, + now) + require.Error(t, err) +} + func TestNonExistingClient(t *testing.T) { tu.InitializeRandom() @@ -62,10 +123,66 @@ func TestNonExistingClient(t *testing.T) { invalidOperatorID := tu.RandomBytes(32) + request := randomGetChunksRequest() + request.RequesterId = invalidOperatorID + + ics.Mock.On("GetCurrentBlockNumber").Return(uint(0), nil) err = authenticator.AuthenticateGetChunksRequest( - string(invalidOperatorID), - &pb.GetChunksRequest{ - RequesterId: invalidOperatorID, - }, + "foobar", + request, time.Now()) + require.Error(t, err) +} + +func TestBadSignature(t *testing.T) { + tu.InitializeRandom() + + operatorID := mock.MakeOperatorId(0) + stakes := map[core.QuorumID]map[core.OperatorID]int{ + core.QuorumID(0): { + operatorID: 1, + }, + } + ics, err := mock.NewChainDataMock(stakes) + require.NoError(t, err) + + timeout := 10 * time.Second + + authenticator := NewRequestAuthenticator(ics, timeout) + + request := randomGetChunksRequest() + request.RequesterId = operatorID[:] + hash := HashGetChunksRequest(request) + signature := ics.KeyPairs[operatorID].SignMessage([32]byte(hash)) + request.RequesterSignature = signature.G1Point.Serialize() + + now := time.Now() + + ics.Mock.On("GetCurrentBlockNumber").Return(uint(0), nil) + err = authenticator.AuthenticateGetChunksRequest( + "foobar", + request, + now) + require.NoError(t, err) + + // move time forward to wipe out previous authentication + now = now.Add(timeout) + + // Change a byte in the signature to make it invalid + request.RequesterSignature[0] = request.RequesterSignature[0] ^ 1 + + err = authenticator.AuthenticateGetChunksRequest( + "foobar", + request, + now) + require.Error(t, err) + + // Sign different data with the same key. + signature = ics.KeyPairs[operatorID].SignMessage([32]byte(tu.RandomBytes(32))) + request.RequesterSignature = signature.G1Point.Serialize() + err = authenticator.AuthenticateGetChunksRequest( + "foobar", + request, + now) + require.Error(t, err) } From 3993af4826c5384ab56a9bda612775c47f910b0c Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 12:26:24 -0600 Subject: [PATCH 19/45] Nil authenticator test. Signed-off-by: Cody Littley --- relay/auth/authenticator_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/relay/auth/authenticator_test.go b/relay/auth/authenticator_test.go index abd14d357d..d448bbce1d 100644 --- a/relay/auth/authenticator_test.go +++ b/relay/auth/authenticator_test.go @@ -186,3 +186,14 @@ func TestBadSignature(t *testing.T) { now) require.Error(t, err) } + +func TestNilAuthenticator(t *testing.T) { + var authenticator *requestAuthenticator = nil + + request := randomGetChunksRequest() + err := authenticator.AuthenticateGetChunksRequest( + "foobar", + request, + time.Now()) + require.NoError(t, err) +} From d094b80184c80b0e440d1d8567ac1f17e83ad3fa Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 12:28:32 -0600 Subject: [PATCH 20/45] Test with authentication saving disabled. Signed-off-by: Cody Littley --- relay/auth/authenticator_test.go | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/relay/auth/authenticator_test.go b/relay/auth/authenticator_test.go index d448bbce1d..9838482ae7 100644 --- a/relay/auth/authenticator_test.go +++ b/relay/auth/authenticator_test.go @@ -105,6 +105,51 @@ func TestValidRequest(t *testing.T) { require.Error(t, err) } +func TestAuthenticationSavingDisabled(t *testing.T) { + tu.InitializeRandom() + + operatorID := mock.MakeOperatorId(0) + stakes := map[core.QuorumID]map[core.OperatorID]int{ + core.QuorumID(0): { + operatorID: 1, + }, + } + ics, err := mock.NewChainDataMock(stakes) + require.NoError(t, err) + + // This disables saving of authentication results. + timeout := time.Duration(0) + + authenticator := NewRequestAuthenticator(ics, timeout) + + request := randomGetChunksRequest() + request.RequesterId = operatorID[:] + hash := HashGetChunksRequest(request) + signature := ics.KeyPairs[operatorID].SignMessage([32]byte(hash)) + request.RequesterSignature = signature.G1Point.Serialize() + + now := time.Now() + + ics.Mock.On("GetCurrentBlockNumber").Return(uint(0), nil) + err = authenticator.AuthenticateGetChunksRequest( + "foobar", + request, + now) + require.NoError(t, err) + + // There is no authentication timeout, so a new request should trigger authentication. + // To probe at this, intentionally make a request that would be considered invalid if it were authenticated. + invalidRequest := randomGetChunksRequest() + invalidRequest.RequesterId = operatorID[:] + invalidRequest.RequesterSignature = signature.G1Point.Serialize() // the previous signature is invalid here + + err = authenticator.AuthenticateGetChunksRequest( + "foobar", + invalidRequest, + now) + require.Error(t, err) +} + func TestNonExistingClient(t *testing.T) { tu.InitializeRandom() From d8691e1bc1ad47b9d49d49c6de9783de0e954d2a Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 13:03:44 -0600 Subject: [PATCH 21/45] Tie together config. Signed-off-by: Cody Littley --- ...{request_hashing.go => request_signing.go} | 7 +++ ...ashing_test.go => request_signing_test.go} | 0 relay/cmd/config.go | 13 +++++ relay/cmd/flags/flags.go | 40 ++++++++++++++ relay/cmd/main.go | 54 ++++++++++++++++++- relay/server.go | 22 +++++--- relay/server_test.go | 18 +++++-- 7 files changed, 142 insertions(+), 12 deletions(-) rename relay/auth/{request_hashing.go => request_signing.go} (81%) rename relay/auth/{request_hashing_test.go => request_signing_test.go} (100%) diff --git a/relay/auth/request_hashing.go b/relay/auth/request_signing.go similarity index 81% rename from relay/auth/request_hashing.go rename to relay/auth/request_signing.go index 3d49f566d1..72e49c53e5 100644 --- a/relay/auth/request_hashing.go +++ b/relay/auth/request_signing.go @@ -40,3 +40,10 @@ func HashGetChunksRequest(request *pb.GetChunksRequest) []byte { return hasher.Sum(nil) } + +// SignGetChunksRequest signs the given GetChunksRequest with the given private key. +func SignGetChunksRequest(request *pb.GetChunksRequest, privateKey []byte) ([]byte, error) { + //hash := HashGetChunksRequest(request) + // TODO implement this + return nil, nil +} diff --git a/relay/auth/request_hashing_test.go b/relay/auth/request_signing_test.go similarity index 100% rename from relay/auth/request_hashing_test.go rename to relay/auth/request_signing_test.go diff --git a/relay/cmd/config.go b/relay/cmd/config.go index bb7566f5a1..5427985a04 100644 --- a/relay/cmd/config.go +++ b/relay/cmd/config.go @@ -2,7 +2,9 @@ package main import ( "fmt" + "github.com/Layr-Labs/eigenda/common/geth" "github.com/Layr-Labs/eigenda/relay/limiter" + "time" "github.com/Layr-Labs/eigenda/common" "github.com/Layr-Labs/eigenda/common/aws" @@ -29,6 +31,11 @@ type Config struct { // RelayConfig is the configuration for the relay. RelayConfig relay.Config + + EthClientConfig geth.EthClientConfig + IndexerPullInterval time.Duration + BLSOperatorStateRetrieverAddr string + EigenDAServiceManagerAddr string } func NewConfig(ctx *cli.Context) (Config, error) { @@ -73,7 +80,13 @@ func NewConfig(ctx *cli.Context) (Config, error) { GetChunkBytesBurstinessClient: ctx.Int(flags.GetChunkBytesBurstinessClientFlag.Name), MaxConcurrentGetChunkOpsClient: ctx.Int(flags.MaxConcurrentGetChunkOpsClientFlag.Name), }, + AuthenticationTimeout: ctx.Duration(flags.AuthenticationTimeoutFlag.Name), + AuthenticationDisabled: ctx.Bool(flags.AuthenticationDisabledFlag.Name), }, + EthClientConfig: geth.ReadEthClientConfig(ctx), + IndexerPullInterval: ctx.Duration(flags.IndexerPullIntervalFlag.Name), + BLSOperatorStateRetrieverAddr: ctx.String(flags.BlsOperatorStateRetrieverAddrFlag.Name), + EigenDAServiceManagerAddr: ctx.String(flags.EigenDAServiceManagerAddrFlag.Name), } for i, id := range relayIDs { config.RelayConfig.RelayIDs[i] = core.RelayKey(id) diff --git a/relay/cmd/flags/flags.go b/relay/cmd/flags/flags.go index 9abd673566..f5bbce330d 100644 --- a/relay/cmd/flags/flags.go +++ b/relay/cmd/flags/flags.go @@ -3,7 +3,9 @@ package flags import ( "github.com/Layr-Labs/eigenda/common" "github.com/Layr-Labs/eigenda/common/aws" + "github.com/Layr-Labs/eigenda/common/geth" "github.com/urfave/cli" + "time" ) const ( @@ -189,6 +191,38 @@ var ( EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_CONCURRENT_GET_CHUNK_OPS_CLIENT"), Value: 1, } + BlsOperatorStateRetrieverAddrFlag = cli.StringFlag{ + Name: common.PrefixFlag(FlagPrefix, "bls-operator-state-retriever-addr"), + Usage: "Address of the BLS operator state retriever", + Required: true, + EnvVar: common.PrefixEnvVar(envVarPrefix, "BLS_OPERATOR_STATE_RETRIEVER_ADDR"), + } + EigenDAServiceManagerAddrFlag = cli.StringFlag{ + Name: common.PrefixFlag(FlagPrefix, "eigen-da-service-manager-addr"), + Usage: "Address of the Eigen DA service manager", + Required: true, + EnvVar: common.PrefixEnvVar(envVarPrefix, "EIGEN_DA_SERVICE_MANAGER_ADDR"), + } + IndexerPullIntervalFlag = cli.DurationFlag{ + Name: common.PrefixFlag(FlagPrefix, "indexer-pull-interval"), + Usage: "Interval to pull from the indexer", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "INDEXER_PULL_INTERVAL"), + Value: 5 * time.Minute, + } + AuthenticationTimeoutFlag = cli.DurationFlag{ + Name: common.PrefixFlag(FlagPrefix, "authentication-timeout"), + Usage: "Duration to keep authentication results", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "AUTHENTICATION_TIMEOUT"), + Value: 5 * time.Minute, + } + AuthenticationDisabledFlag = cli.BoolFlag{ + Name: common.PrefixFlag(FlagPrefix, "authentication-disabled"), + Usage: "Disable GetChunks() authentication", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "AUTHENTICATION_DISABLED"), + } ) var requiredFlags = []cli.Flag{ @@ -196,6 +230,10 @@ var requiredFlags = []cli.Flag{ BucketNameFlag, MetadataTableNameFlag, RelayIDsFlag, + BlsOperatorStateRetrieverAddrFlag, + EigenDAServiceManagerAddrFlag, + AuthenticationTimeoutFlag, + AuthenticationDisabledFlag, } var optionalFlags = []cli.Flag{ @@ -221,6 +259,7 @@ var optionalFlags = []cli.Flag{ MaxGetChunkBytesPerSecondClientFlag, GetChunkBytesBurstinessClientFlag, MaxConcurrentGetChunkOpsClientFlag, + IndexerPullIntervalFlag, } var Flags []cli.Flag @@ -229,4 +268,5 @@ func init() { Flags = append(requiredFlags, optionalFlags...) Flags = append(Flags, common.LoggerCLIFlags(envVarPrefix, FlagPrefix)...) Flags = append(Flags, aws.ClientFlags(envVarPrefix, FlagPrefix)...) + Flags = append(Flags, geth.EthClientFlags(envVarPrefix)...) } diff --git a/relay/cmd/main.go b/relay/cmd/main.go index 2730ccef21..cc4ab6c3d8 100644 --- a/relay/cmd/main.go +++ b/relay/cmd/main.go @@ -3,6 +3,14 @@ package main import ( "context" "fmt" + "github.com/Layr-Labs/eigenda/common/geth" + "github.com/Layr-Labs/eigenda/core" + coreeth "github.com/Layr-Labs/eigenda/core/eth" + coreindexer "github.com/Layr-Labs/eigenda/core/indexer" + "github.com/Layr-Labs/eigenda/indexer" + "github.com/Layr-Labs/eigensdk-go/logging" + gethcommon "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rpc" "log" "os" @@ -64,6 +72,10 @@ func RunRelay(ctx *cli.Context) error { metadataStore := blobstore.NewBlobMetadataStore(dynamoClient, logger, config.MetadataTableName) blobStore := blobstore.NewBlobStore(config.BucketName, s3Client, logger) chunkReader := chunkstore.NewChunkReader(logger, s3Client, config.BucketName) + ics, err := buildICS(logger, &config) + if err != nil { + return fmt.Errorf("failed to build ics: %w", err) + } server, err := relay.NewServer( context.Background(), @@ -71,7 +83,8 @@ func RunRelay(ctx *cli.Context) error { &config.RelayConfig, metadataStore, blobStore, - chunkReader) + chunkReader, + ics) if err != nil { return fmt.Errorf("failed to create relay server: %w", err) } @@ -83,3 +96,42 @@ func RunRelay(ctx *cli.Context) error { return nil } + +func buildICS(logger logging.Logger, config *Config) (core.IndexedChainState, error) { + rpcClient, err := rpc.Dial(config.EthClientConfig.RPCURLs[0]) + if err != nil { + return nil, fmt.Errorf("failed to create rpc client: %w", err) + } + + client, err := geth.NewMultiHomingClient(config.EthClientConfig, gethcommon.Address{}, logger) + if err != nil { + logger.Error("Cannot create chain.Client", "err", err) + return nil, err + } + + tx, err := coreeth.NewWriter(logger, client, config.BLSOperatorStateRetrieverAddr, config.EigenDAServiceManagerAddr) + if err != nil { + return nil, fmt.Errorf("failed to create eth writer: %w", err) + } + + idx, err := coreindexer.CreateNewIndexer( + &indexer.Config{ + PullInterval: config.IndexerPullInterval, + }, + client, + rpcClient, + config.EigenDAServiceManagerAddr, + logger, + ) + if err != nil { + return nil, fmt.Errorf("failed to create indexer: %w", err) + } + + cs := coreeth.NewChainState(tx, client) + ics, err := coreindexer.NewIndexedChainState(cs, idx) + if err != nil { + return nil, fmt.Errorf("failed to create indexed chain state: %w", err) + } + + return ics, nil +} diff --git a/relay/server.go b/relay/server.go index 86864115be..bdc8b42a14 100644 --- a/relay/server.go +++ b/relay/server.go @@ -52,7 +52,7 @@ type Server struct { grpcServer *grpc.Server // authenticator is used to authenticate requests to the relay service. - authenticator auth.RequestAuthenticator // TODO set this + authenticator auth.RequestAuthenticator } type Config struct { @@ -92,6 +92,14 @@ type Config struct { // RateLimits contains configuration for rate limiting. RateLimits limiter.Config + + // AuthenticationTimeout is the duration for which an authentication is "cached". A request from the same client + // within this duration will not trigger a new authentication in order to save resources. If zero, then each request + // will be authenticated independently, regardless of timing. + AuthenticationTimeout time.Duration + + // AuthenticationDisabled will disable authentication if set to true. + AuthenticationDisabled bool } // NewServer creates a new relay Server. @@ -101,7 +109,8 @@ func NewServer( config *Config, metadataStore *blobstore.BlobMetadataStore, blobStore *blobstore.BlobStore, - chunkReader chunkstore.ChunkReader) (*Server, error) { + chunkReader chunkstore.ChunkReader, + ics core.IndexedChainState) (*Server, error) { mp, err := newMetadataProvider( ctx, @@ -134,8 +143,10 @@ func NewServer( return nil, fmt.Errorf("error creating chunk provider: %w", err) } - // TODO - authenticator := auth.NewRequestAuthenticator(nil, 0) + var authenticator auth.RequestAuthenticator + if !config.AuthenticationDisabled { + authenticator = auth.NewRequestAuthenticator(ics, config.AuthenticationTimeout) + } return &Server{ config: config, @@ -219,9 +230,8 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* if err != nil { return nil, fmt.Errorf("auth failed: %w", err) } - // TODO make methods take correct type - clientID := fmt.Sprintf("%x", request.RequesterId) + clientID := string(request.RequesterId) err = s.chunkRateLimiter.BeginGetChunkOperation(time.Now(), clientID) if err != nil { return nil, err diff --git a/relay/server_test.go b/relay/server_test.go index cedfa6ddb4..96d465a788 100644 --- a/relay/server_test.go +++ b/relay/server_test.go @@ -45,6 +45,7 @@ func defaultConfig() *Config { GetChunkBytesBurstinessClient: 2 * 1024 * 1024, MaxConcurrentGetChunkOpsClient: 1, }, + AuthenticationDisabled: true, } } @@ -101,6 +102,7 @@ func TestReadWriteBlobs(t *testing.T) { config, metadataStore, blobStore, + nil, /* not used in this test*/ nil /* not used in this test*/) require.NoError(t, err) @@ -178,7 +180,8 @@ func TestReadNonExistentBlob(t *testing.T) { config, metadataStore, blobStore, - nil /* not used in this test */) + nil, /* not used in this test */ + nil /* not used in this test*/) require.NoError(t, err) go func() { @@ -230,6 +233,7 @@ func TestReadWriteBlobsWithSharding(t *testing.T) { config, metadataStore, blobStore, + nil, /* not used in this test*/ nil /* not used in this test*/) require.NoError(t, err) @@ -347,7 +351,8 @@ func TestReadWriteChunks(t *testing.T) { config, metadataStore, nil, /* not used in this test*/ - chunkReader) + chunkReader, + nil /* not used in this test*/) require.NoError(t, err) go func() { @@ -542,7 +547,8 @@ func TestBatchedReadWriteChunks(t *testing.T) { config, metadataStore, nil, /* not used in this test */ - chunkReader) + chunkReader, + nil /* not used in this test*/) require.NoError(t, err) go func() { @@ -667,7 +673,8 @@ func TestReadWriteChunksWithSharding(t *testing.T) { config, metadataStore, nil, /* not used in this test*/ - chunkReader) + chunkReader, + nil /* not used in this test*/) require.NoError(t, err) go func() { @@ -941,7 +948,8 @@ func TestBatchedReadWriteChunksWithSharding(t *testing.T) { config, metadataStore, nil, /* not used in this test */ - chunkReader) + chunkReader, + nil /* not used in this test*/) require.NoError(t, err) go func() { From 8b9512b97219539ed0a08c9730e68c7ee15b8b15 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 13:13:04 -0600 Subject: [PATCH 22/45] Add method for convenient signing. Signed-off-by: Cody Littley --- relay/auth/authenticator_test.go | 27 +++++++-------------------- relay/auth/request_signing.go | 9 +++++---- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/relay/auth/authenticator_test.go b/relay/auth/authenticator_test.go index 9838482ae7..218ca8d5d9 100644 --- a/relay/auth/authenticator_test.go +++ b/relay/auth/authenticator_test.go @@ -61,9 +61,8 @@ func TestValidRequest(t *testing.T) { request := randomGetChunksRequest() request.RequesterId = operatorID[:] - hash := HashGetChunksRequest(request) - signature := ics.KeyPairs[operatorID].SignMessage([32]byte(hash)) - request.RequesterSignature = signature.G1Point.Serialize() + SignGetChunksRequest(ics.KeyPairs[operatorID], request) + signature := request.RequesterSignature now := time.Now() @@ -78,7 +77,7 @@ func TestValidRequest(t *testing.T) { // To probe at this, intentionally make a request that would be considered invalid if it were authenticated. invalidRequest := randomGetChunksRequest() invalidRequest.RequesterId = operatorID[:] - invalidRequest.RequesterSignature = signature.G1Point.Serialize() // the previous signature is invalid here + invalidRequest.RequesterSignature = signature // the previous signature is invalid here start := now for now.Before(start.Add(timeout)) { @@ -124,9 +123,8 @@ func TestAuthenticationSavingDisabled(t *testing.T) { request := randomGetChunksRequest() request.RequesterId = operatorID[:] - hash := HashGetChunksRequest(request) - signature := ics.KeyPairs[operatorID].SignMessage([32]byte(hash)) - request.RequesterSignature = signature.G1Point.Serialize() + SignGetChunksRequest(ics.KeyPairs[operatorID], request) + signature := request.RequesterSignature now := time.Now() @@ -141,7 +139,7 @@ func TestAuthenticationSavingDisabled(t *testing.T) { // To probe at this, intentionally make a request that would be considered invalid if it were authenticated. invalidRequest := randomGetChunksRequest() invalidRequest.RequesterId = operatorID[:] - invalidRequest.RequesterSignature = signature.G1Point.Serialize() // the previous signature is invalid here + invalidRequest.RequesterSignature = signature // the previous signature is invalid here err = authenticator.AuthenticateGetChunksRequest( "foobar", @@ -197,9 +195,7 @@ func TestBadSignature(t *testing.T) { request := randomGetChunksRequest() request.RequesterId = operatorID[:] - hash := HashGetChunksRequest(request) - signature := ics.KeyPairs[operatorID].SignMessage([32]byte(hash)) - request.RequesterSignature = signature.G1Point.Serialize() + SignGetChunksRequest(ics.KeyPairs[operatorID], request) now := time.Now() @@ -221,15 +217,6 @@ func TestBadSignature(t *testing.T) { request, now) require.Error(t, err) - - // Sign different data with the same key. - signature = ics.KeyPairs[operatorID].SignMessage([32]byte(tu.RandomBytes(32))) - request.RequesterSignature = signature.G1Point.Serialize() - err = authenticator.AuthenticateGetChunksRequest( - "foobar", - request, - now) - require.Error(t, err) } func TestNilAuthenticator(t *testing.T) { diff --git a/relay/auth/request_signing.go b/relay/auth/request_signing.go index 72e49c53e5..8170951f48 100644 --- a/relay/auth/request_signing.go +++ b/relay/auth/request_signing.go @@ -3,6 +3,7 @@ package auth import ( "encoding/binary" pb "github.com/Layr-Labs/eigenda/api/grpc/relay" + "github.com/Layr-Labs/eigenda/core" "golang.org/x/crypto/sha3" ) @@ -42,8 +43,8 @@ func HashGetChunksRequest(request *pb.GetChunksRequest) []byte { } // SignGetChunksRequest signs the given GetChunksRequest with the given private key. -func SignGetChunksRequest(request *pb.GetChunksRequest, privateKey []byte) ([]byte, error) { - //hash := HashGetChunksRequest(request) - // TODO implement this - return nil, nil +func SignGetChunksRequest(keys *core.KeyPair, request *pb.GetChunksRequest) { + hash := HashGetChunksRequest(request) + signature := keys.SignMessage(([32]byte)(hash)) + request.RequesterSignature = signature.G1Point.Serialize() } From fb7ec51c77b5057a181c479f078c744ce02cc306 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 14:45:36 -0600 Subject: [PATCH 23/45] Made requested changes. Signed-off-by: Cody Littley --- relay/limiter/blob_rate_limiter.go | 9 ++++++--- relay/limiter/chunk_rate_limiter.go | 25 ++++++++++++++++++------- relay/metadata_provider.go | 2 +- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/relay/limiter/blob_rate_limiter.go b/relay/limiter/blob_rate_limiter.go index 0ac260cba8..1131af863b 100644 --- a/relay/limiter/blob_rate_limiter.go +++ b/relay/limiter/blob_rate_limiter.go @@ -57,10 +57,12 @@ func (l *BlobRateLimiter) BeginGetBlobOperation(now time.Time) error { defer l.lock.Unlock() if l.operationsInFlight >= l.config.MaxConcurrentGetBlobOps { - return fmt.Errorf("global concurrent request limit exceeded for getBlob operations, try again later") + return fmt.Errorf("global concurrent request limit %d exceeded for getBlob operations, try again later", + l.config.MaxConcurrentGetBlobOps) } if l.opLimiter.TokensAt(now) < 1 { - return fmt.Errorf("global rate limit exceeded for getBlob operations, try again later") + return fmt.Errorf("global rate limit %0.1fhz exceeded for getBlob operations, try again later", + l.config.MaxGetBlobOpsPerSecond) } l.operationsInFlight++ @@ -96,7 +98,8 @@ func (l *BlobRateLimiter) RequestGetBlobBandwidth(now time.Time, bytes uint32) e allowed := l.bandwidthLimiter.AllowN(now, int(bytes)) if !allowed { - return fmt.Errorf("global rate limit exceeded for getBlob bandwidth, try again later") + return fmt.Errorf("global rate limit %dMib/s exceeded for getBlob bandwidth, try again later", + int(l.config.MaxGetBlobBytesPerSecond/1024/1024)) } return nil } diff --git a/relay/limiter/chunk_rate_limiter.go b/relay/limiter/chunk_rate_limiter.go index 0db62648cd..061484a0c7 100644 --- a/relay/limiter/chunk_rate_limiter.go +++ b/relay/limiter/chunk_rate_limiter.go @@ -94,16 +94,21 @@ func (l *ChunkRateLimiter) BeginGetChunkOperation( } if l.globalOperationsInFlight >= l.config.MaxConcurrentGetChunkOps { - return fmt.Errorf("global concurrent request limit exceeded for GetChunks operations, try again later") + return fmt.Errorf( + "global concurrent request limit %d exceeded for GetChunks operations, try again later", + l.config.MaxConcurrentGetChunkOps) } if l.globalOpLimiter.TokensAt(now) < 1 { - return fmt.Errorf("global rate limit exceeded for GetChunks operations, try again later") + return fmt.Errorf("global rate limit %0.1fhz exceeded for GetChunks operations, try again later", + l.config.MaxGetChunkOpsPerSecond) } if l.perClientOperationsInFlight[requesterID] >= l.config.MaxConcurrentGetChunkOpsClient { - return fmt.Errorf("client concurrent request limit exceeded for GetChunks") + return fmt.Errorf("client concurrent request limit %d exceeded for GetChunks", + l.config.MaxConcurrentGetChunkOpsClient) } if l.perClientOpLimiter[requesterID].TokensAt(now) < 1 { - return fmt.Errorf("client rate limit exceeded for GetChunks, try again later") + return fmt.Errorf("client rate limit %0.1fhz exceeded for GetChunks, try again later", + l.config.MaxGetChunkOpsPerSecondClient) } l.globalOperationsInFlight++ @@ -138,13 +143,19 @@ func (l *ChunkRateLimiter) RequestGetChunkBandwidth(now time.Time, requesterID s allowed := l.globalBandwidthLimiter.AllowN(now, bytes) if !allowed { - return fmt.Errorf("global rate limit exceeded for GetChunk bandwidth, try again later") + return fmt.Errorf("global rate limit %dMiB exceeded for GetChunk bandwidth, try again later", + int(l.config.MaxGetChunkBytesPerSecond/1024/1024)) } - allowed = l.perClientBandwidthLimiter[requesterID].AllowN(now, bytes) + limiter, ok := l.perClientBandwidthLimiter[requesterID] + if !ok { + return fmt.Errorf("internal error, unable to find bandwidth limiter for client ID %s", requesterID) + } + allowed = limiter.AllowN(now, bytes) if !allowed { l.globalBandwidthLimiter.AllowN(now, -bytes) - return fmt.Errorf("client rate limit exceeded for GetChunk bandwidth, try again later") + return fmt.Errorf("client rate limit %dMiB exceeded for GetChunk bandwidth, try again later", + int(l.config.MaxGetChunkBytesPerSecondClient/1024/1024)) } return nil diff --git a/relay/metadata_provider.go b/relay/metadata_provider.go index 3e32924072..c916d287f2 100644 --- a/relay/metadata_provider.go +++ b/relay/metadata_provider.go @@ -157,7 +157,7 @@ func (m *metadataProvider) fetchMetadata(key v2.BlobKey) (*blobMetadata, error) } // TODO(cody-littley): blob size is not correct https://github.com/Layr-Labs/eigenda/pull/906#discussion_r1847396530 - blobSize := uint32(cert.BlobHeader.BlobCommitments.Length) + blobSize := uint32(cert.BlobHeader.BlobCommitments.Length) * encoding.BYTES_PER_SYMBOL chunkSize, err := v2.GetChunkLength(cert.BlobHeader.BlobVersion, blobSize) chunkSize *= encoding.BYTES_PER_SYMBOL if err != nil { From 5f853e0c7740f69fd108e1e6b2eb172ba0079417 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 19 Nov 2024 15:06:23 -0600 Subject: [PATCH 24/45] Revert unintentional changes. Signed-off-by: Cody Littley --- core/data.go | 2 +- disperser/apiserver/disperse_blob_v2.go | 2 +- disperser/apiserver/server_v2_test.go | 2 +- disperser/dataapi/docs/docs.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/data.go b/core/data.go index f3c8a154b9..61f6be0e77 100644 --- a/core/data.go +++ b/core/data.go @@ -30,7 +30,7 @@ type SecurityParam struct { AdversaryThreshold uint8 // ConfirmationThreshold is the amount of stake that must sign a message for it to be considered valid as a percentage of the total stake in the quorum ConfirmationThreshold uint8 - // Rate Limit. This is a temporary measure until the node can derive rates on its own using rollup auth. This is used + // Rate Limit. This is a temporary measure until the node can derive rates on its own using rollup authentication. This is used // for restricting the rate at which retrievers are able to download data from the DA node to a multiple of the rate at which the // data was posted to the DA node. QuorumRate common.RateParam diff --git a/disperser/apiserver/disperse_blob_v2.go b/disperser/apiserver/disperse_blob_v2.go index 10e3ca5a9d..d39d75f2cd 100644 --- a/disperser/apiserver/disperse_blob_v2.go +++ b/disperser/apiserver/disperse_blob_v2.go @@ -110,7 +110,7 @@ func (s *DispersalServerV2) validateDispersalRequest(req *pb.DisperseBlobRequest return api.NewErrorInvalidArg(fmt.Sprintf("invalid blob header: %s", err.Error())) } if err = s.authenticator.AuthenticateBlobRequest(blobHeader); err != nil { - return api.NewErrorInvalidArg(fmt.Sprintf("auth failed: %s", err.Error())) + return api.NewErrorInvalidArg(fmt.Sprintf("authentication failed: %s", err.Error())) } // TODO(ian-shim): validate commitment, length is power of 2 and less than maxNumSymbolsPerBlob, payment metadata diff --git a/disperser/apiserver/server_v2_test.go b/disperser/apiserver/server_v2_test.go index 495e40acd8..0c2697366f 100644 --- a/disperser/apiserver/server_v2_test.go +++ b/disperser/apiserver/server_v2_test.go @@ -200,7 +200,7 @@ func TestV2DisperseBlobRequestValidation(t *testing.T) { Data: data, BlobHeader: invalidReqProto, }) - assert.ErrorContains(t, err, "auth failed") + assert.ErrorContains(t, err, "authentication failed") } func TestV2GetBlobStatus(t *testing.T) { diff --git a/disperser/dataapi/docs/docs.go b/disperser/dataapi/docs/docs.go index 9cf7dff3b1..6c8a7c4874 100644 --- a/disperser/dataapi/docs/docs.go +++ b/disperser/dataapi/docs/docs.go @@ -765,7 +765,7 @@ const docTemplate = `{ "type": "integer" }, "quorumRate": { - "description": "Rate Limit. This is a temporary measure until the node can derive rates on its own using rollup auth. This is used\nfor restricting the rate at which retrievers are able to download data from the DA node to a multiple of the rate at which the\ndata was posted to the DA node.", + "description": "Rate Limit. This is a temporary measure until the node can derive rates on its own using rollup authentication. This is used\nfor restricting the rate at which retrievers are able to download data from the DA node to a multiple of the rate at which the\ndata was posted to the DA node.", "type": "integer" } } From f22544c855c2735948f400187f36baca6067cc6e Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 08:31:15 -0600 Subject: [PATCH 25/45] Fix bug. Signed-off-by: Cody Littley --- relay/auth/authenticator.go | 5 ----- relay/auth/authenticator_test.go | 11 ----------- relay/server.go | 10 ++++++---- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/relay/auth/authenticator.go b/relay/auth/authenticator.go index ee6f67e8b2..aa804e842d 100644 --- a/relay/auth/authenticator.go +++ b/relay/auth/authenticator.go @@ -65,11 +65,6 @@ func (a *requestAuthenticator) AuthenticateGetChunksRequest( request *pb.GetChunksRequest, now time.Time) error { - if a == nil { - // do not enforce auth if the authenticator is nil - return nil - } - if a.isAuthenticationStillValid(now, address) { // We've recently authenticated this client. Do not authenticate again for a while. return nil diff --git a/relay/auth/authenticator_test.go b/relay/auth/authenticator_test.go index 218ca8d5d9..7817041369 100644 --- a/relay/auth/authenticator_test.go +++ b/relay/auth/authenticator_test.go @@ -218,14 +218,3 @@ func TestBadSignature(t *testing.T) { now) require.Error(t, err) } - -func TestNilAuthenticator(t *testing.T) { - var authenticator *requestAuthenticator = nil - - request := randomGetChunksRequest() - err := authenticator.AuthenticateGetChunksRequest( - "foobar", - request, - time.Now()) - require.NoError(t, err) -} diff --git a/relay/server.go b/relay/server.go index 703a4b17ca..4bc61ca8ad 100644 --- a/relay/server.go +++ b/relay/server.go @@ -225,13 +225,15 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* } clientAddress := client.Addr.String() - err := s.authenticator.AuthenticateGetChunksRequest(clientAddress, request, time.Now()) - if err != nil { - return nil, fmt.Errorf("auth failed: %w", err) + if s.authenticator != nil { + err := s.authenticator.AuthenticateGetChunksRequest(clientAddress, request, time.Now()) + if err != nil { + return nil, fmt.Errorf("auth failed: %w", err) + } } clientID := string(request.RequesterId) - err = s.chunkRateLimiter.BeginGetChunkOperation(time.Now(), clientID) + err := s.chunkRateLimiter.BeginGetChunkOperation(time.Now(), clientID) if err != nil { return nil, err From 7a26c27eec58d63249440d1bd230be07597727eb Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 08:42:08 -0600 Subject: [PATCH 26/45] Made requested changes. Signed-off-by: Cody Littley --- api/grpc/relay/relay.pb.go | 104 +++++++++++++++-------------- api/proto/relay/relay.proto | 12 ++-- relay/auth/authenticator.go | 6 +- relay/auth/authenticator_test.go | 22 +++--- relay/auth/request_signing.go | 4 +- relay/auth/request_signing_test.go | 6 +- relay/server.go | 2 +- 7 files changed, 80 insertions(+), 76 deletions(-) diff --git a/api/grpc/relay/relay.pb.go b/api/grpc/relay/relay.pb.go index 14fdac87ca..625b6dc8f5 100644 --- a/api/grpc/relay/relay.pb.go +++ b/api/grpc/relay/relay.pb.go @@ -126,12 +126,14 @@ type GetChunksRequest struct { // The chunk requests. Chunks are returned in the same order as they are requested. ChunkRequests []*ChunkRequest `protobuf:"bytes,1,rep,name=chunk_requests,json=chunkRequests,proto3" json:"chunk_requests,omitempty"` - // If this is an authenticated request, this should hold the ID of the requester. If this - // is an unauthenticated request, this field should be empty. - RequesterId []byte `protobuf:"bytes,2,opt,name=requester_id,json=requesterId,proto3" json:"requester_id,omitempty"` + // If this is an authenticated request, this should hold the ID of the operator. If this + // is an unauthenticated request, this field should be empty. Relays may choose to reject + // unauthenticated requests. + OperatorId []byte `protobuf:"bytes,2,opt,name=operator_id,json=operatorId,proto3" json:"operator_id,omitempty"` // If this is an authenticated request, this field will hold a signature by the requester - // on the chunks being requested. - RequesterSignature []byte `protobuf:"bytes,3,opt,name=requester_signature,json=requesterSignature,proto3" json:"requester_signature,omitempty"` + // on the hash of this request. Signature should be computed with relay.auth.SignGetChunksRequest() + // or an equivalent implementation. Relays may choose to reject unauthenticated requests. + OperatorSignature []byte `protobuf:"bytes,3,opt,name=operator_signature,json=operatorSignature,proto3" json:"operator_signature,omitempty"` } func (x *GetChunksRequest) Reset() { @@ -173,16 +175,16 @@ func (x *GetChunksRequest) GetChunkRequests() []*ChunkRequest { return nil } -func (x *GetChunksRequest) GetRequesterId() []byte { +func (x *GetChunksRequest) GetOperatorId() []byte { if x != nil { - return x.RequesterId + return x.OperatorId } return nil } -func (x *GetChunksRequest) GetRequesterSignature() []byte { +func (x *GetChunksRequest) GetOperatorSignature() []byte { if x != nil { - return x.RequesterSignature + return x.OperatorSignature } return nil } @@ -456,52 +458,52 @@ var file_relay_relay_proto_rawDesc = []byte{ 0x6c, 0x6f, 0x62, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x62, 0x6c, 0x6f, 0x62, 0x4b, 0x65, 0x79, 0x22, 0x22, 0x0a, 0x0c, 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6c, 0x6f, 0x62, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6c, 0x6f, 0x62, 0x22, 0xa1, 0x01, 0x0a, 0x10, 0x47, + 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6c, 0x6f, 0x62, 0x22, 0x9d, 0x01, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x39, 0x0a, 0x0e, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x5f, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, 0x0d, 0x63, 0x68, 0x75, - 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, - 0x52, 0x0b, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x65, 0x72, 0x49, 0x64, 0x12, 0x2f, 0x0a, - 0x13, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x73, 0x69, 0x67, 0x6e, 0x61, - 0x74, 0x75, 0x72, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x12, 0x72, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x65, 0x72, 0x53, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x22, 0x55, - 0x0a, 0x13, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x42, 0x79, - 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x6c, 0x6f, 0x62, 0x5f, 0x6b, 0x65, - 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x62, 0x6c, 0x6f, 0x62, 0x4b, 0x65, 0x79, - 0x12, 0x23, 0x0a, 0x0d, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, - 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x0c, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x49, 0x6e, - 0x64, 0x69, 0x63, 0x65, 0x73, 0x22, 0x6e, 0x0a, 0x13, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x42, 0x79, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x19, 0x0a, 0x08, - 0x62, 0x6c, 0x6f, 0x62, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, - 0x62, 0x6c, 0x6f, 0x62, 0x4b, 0x65, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x74, 0x61, 0x72, 0x74, - 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x73, 0x74, - 0x61, 0x72, 0x74, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x65, 0x6e, 0x64, 0x5f, - 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x08, 0x65, 0x6e, 0x64, - 0x49, 0x6e, 0x64, 0x65, 0x78, 0x22, 0x89, 0x01, 0x0a, 0x0c, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x62, 0x79, 0x5f, 0x69, 0x6e, 0x64, - 0x65, 0x78, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, - 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x42, 0x79, 0x49, 0x6e, - 0x64, 0x65, 0x78, 0x48, 0x00, 0x52, 0x07, 0x62, 0x79, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x36, - 0x0a, 0x08, 0x62, 0x79, 0x5f, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x19, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x42, 0x79, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x48, 0x00, 0x52, 0x07, 0x62, - 0x79, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x42, 0x09, 0x0a, 0x07, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x22, 0x24, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x52, 0x65, - 0x70, 0x6c, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x32, 0x7b, 0x0a, 0x05, 0x52, 0x65, 0x6c, 0x61, 0x79, - 0x12, 0x35, 0x0a, 0x07, 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x12, 0x14, 0x2e, 0x6e, 0x6f, - 0x64, 0x65, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x1a, 0x12, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x62, - 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x09, 0x47, 0x65, 0x74, 0x43, 0x68, - 0x75, 0x6e, 0x6b, 0x73, 0x12, 0x16, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x47, 0x65, 0x74, 0x43, - 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x14, 0x2e, 0x6e, - 0x6f, 0x64, 0x65, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x52, 0x65, 0x70, - 0x6c, 0x79, 0x22, 0x00, 0x42, 0x2d, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, - 0x6f, 0x6d, 0x2f, 0x4c, 0x61, 0x79, 0x72, 0x2d, 0x4c, 0x61, 0x62, 0x73, 0x2f, 0x65, 0x69, 0x67, - 0x65, 0x6e, 0x64, 0x61, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x72, 0x65, - 0x6c, 0x61, 0x79, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x6f, 0x70, + 0x65, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, + 0x0a, 0x6f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x49, 0x64, 0x12, 0x2d, 0x0a, 0x12, 0x6f, + 0x70, 0x65, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x5f, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x11, 0x6f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x6f, + 0x72, 0x53, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x22, 0x55, 0x0a, 0x13, 0x43, 0x68, + 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x42, 0x79, 0x49, 0x6e, 0x64, 0x65, + 0x78, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x6c, 0x6f, 0x62, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0c, 0x52, 0x07, 0x62, 0x6c, 0x6f, 0x62, 0x4b, 0x65, 0x79, 0x12, 0x23, 0x0a, 0x0d, + 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x0d, 0x52, 0x0c, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x49, 0x6e, 0x64, 0x69, 0x63, 0x65, + 0x73, 0x22, 0x6e, 0x0a, 0x13, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x42, 0x79, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x6c, 0x6f, 0x62, + 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x62, 0x6c, 0x6f, 0x62, + 0x4b, 0x65, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, 0x49, + 0x6e, 0x64, 0x65, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x65, 0x6e, 0x64, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x49, 0x6e, 0x64, 0x65, + 0x78, 0x22, 0x89, 0x01, 0x0a, 0x0c, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x62, 0x79, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x43, 0x68, 0x75, 0x6e, + 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x42, 0x79, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x48, + 0x00, 0x52, 0x07, 0x62, 0x79, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x36, 0x0a, 0x08, 0x62, 0x79, + 0x5f, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6e, + 0x6f, 0x64, 0x65, 0x2e, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x42, 0x79, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x48, 0x00, 0x52, 0x07, 0x62, 0x79, 0x52, 0x61, 0x6e, + 0x67, 0x65, 0x42, 0x09, 0x0a, 0x07, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x24, 0x0a, + 0x0e, 0x47, 0x65, 0x74, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, + 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0c, 0x52, 0x04, 0x64, + 0x61, 0x74, 0x61, 0x32, 0x7b, 0x0a, 0x05, 0x52, 0x65, 0x6c, 0x61, 0x79, 0x12, 0x35, 0x0a, 0x07, + 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x12, 0x14, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x47, + 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x12, 0x2e, + 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x70, 0x6c, + 0x79, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x09, 0x47, 0x65, 0x74, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x73, + 0x12, 0x16, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x68, 0x75, 0x6e, 0x6b, + 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x14, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, + 0x47, 0x65, 0x74, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, + 0x42, 0x2d, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x4c, + 0x61, 0x79, 0x72, 0x2d, 0x4c, 0x61, 0x62, 0x73, 0x2f, 0x65, 0x69, 0x67, 0x65, 0x6e, 0x64, 0x61, + 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x72, 0x65, 0x6c, 0x61, 0x79, 0x62, + 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/api/proto/relay/relay.proto b/api/proto/relay/relay.proto index 82e405dbfe..ebe740622d 100644 --- a/api/proto/relay/relay.proto +++ b/api/proto/relay/relay.proto @@ -32,13 +32,15 @@ message GetChunksRequest { // The chunk requests. Chunks are returned in the same order as they are requested. repeated ChunkRequest chunk_requests = 1; - // If this is an authenticated request, this should hold the ID of the requester. If this - // is an unauthenticated request, this field should be empty. - bytes requester_id = 2; + // If this is an authenticated request, this should hold the ID of the operator. If this + // is an unauthenticated request, this field should be empty. Relays may choose to reject + // unauthenticated requests. + bytes operator_id = 2; // If this is an authenticated request, this field will hold a signature by the requester - // on the chunks being requested. - bytes requester_signature = 3; + // on the hash of this request. Signature should be computed with relay.auth.SignGetChunksRequest() + // or an equivalent implementation. Relays may choose to reject unauthenticated requests. + bytes operator_signature = 3; } // A request for chunks within a specific blob. Each chunk is requested individually by its index. diff --git a/relay/auth/authenticator.go b/relay/auth/authenticator.go index aa804e842d..65ccdb7227 100644 --- a/relay/auth/authenticator.go +++ b/relay/auth/authenticator.go @@ -79,14 +79,14 @@ func (a *requestAuthenticator) AuthenticateGetChunksRequest( return fmt.Errorf("failed to get operators: %w", err) } - operatorID := core.OperatorID(request.RequesterId) + operatorID := core.OperatorID(request.OperatorId) operator, ok := operators[operatorID] if !ok { - return errors.New("operator not found") + return fmt.Errorf("operator not found (block %d)", blockNumber) } key := operator.PubkeyG2 - g1Point, err := (&core.G1Point{}).Deserialize(request.RequesterSignature) + g1Point, err := (&core.G1Point{}).Deserialize(request.OperatorSignature) if err != nil { return fmt.Errorf("failed to deserialize signature: %w", err) } diff --git a/relay/auth/authenticator_test.go b/relay/auth/authenticator_test.go index 7817041369..2b71910af9 100644 --- a/relay/auth/authenticator_test.go +++ b/relay/auth/authenticator_test.go @@ -60,9 +60,9 @@ func TestValidRequest(t *testing.T) { authenticator := NewRequestAuthenticator(ics, timeout) request := randomGetChunksRequest() - request.RequesterId = operatorID[:] + request.OperatorId = operatorID[:] SignGetChunksRequest(ics.KeyPairs[operatorID], request) - signature := request.RequesterSignature + signature := request.OperatorSignature now := time.Now() @@ -76,8 +76,8 @@ func TestValidRequest(t *testing.T) { // Making additional requests before timeout elapses should not trigger authentication for the address "foobar". // To probe at this, intentionally make a request that would be considered invalid if it were authenticated. invalidRequest := randomGetChunksRequest() - invalidRequest.RequesterId = operatorID[:] - invalidRequest.RequesterSignature = signature // the previous signature is invalid here + invalidRequest.OperatorId = operatorID[:] + invalidRequest.OperatorSignature = signature // the previous signature is invalid here start := now for now.Before(start.Add(timeout)) { @@ -122,9 +122,9 @@ func TestAuthenticationSavingDisabled(t *testing.T) { authenticator := NewRequestAuthenticator(ics, timeout) request := randomGetChunksRequest() - request.RequesterId = operatorID[:] + request.OperatorId = operatorID[:] SignGetChunksRequest(ics.KeyPairs[operatorID], request) - signature := request.RequesterSignature + signature := request.OperatorSignature now := time.Now() @@ -138,8 +138,8 @@ func TestAuthenticationSavingDisabled(t *testing.T) { // There is no authentication timeout, so a new request should trigger authentication. // To probe at this, intentionally make a request that would be considered invalid if it were authenticated. invalidRequest := randomGetChunksRequest() - invalidRequest.RequesterId = operatorID[:] - invalidRequest.RequesterSignature = signature // the previous signature is invalid here + invalidRequest.OperatorId = operatorID[:] + invalidRequest.OperatorSignature = signature // the previous signature is invalid here err = authenticator.AuthenticateGetChunksRequest( "foobar", @@ -167,7 +167,7 @@ func TestNonExistingClient(t *testing.T) { invalidOperatorID := tu.RandomBytes(32) request := randomGetChunksRequest() - request.RequesterId = invalidOperatorID + request.OperatorId = invalidOperatorID ics.Mock.On("GetCurrentBlockNumber").Return(uint(0), nil) err = authenticator.AuthenticateGetChunksRequest( @@ -194,7 +194,7 @@ func TestBadSignature(t *testing.T) { authenticator := NewRequestAuthenticator(ics, timeout) request := randomGetChunksRequest() - request.RequesterId = operatorID[:] + request.OperatorId = operatorID[:] SignGetChunksRequest(ics.KeyPairs[operatorID], request) now := time.Now() @@ -210,7 +210,7 @@ func TestBadSignature(t *testing.T) { now = now.Add(timeout) // Change a byte in the signature to make it invalid - request.RequesterSignature[0] = request.RequesterSignature[0] ^ 1 + request.OperatorSignature[0] = request.OperatorSignature[0] ^ 1 err = authenticator.AuthenticateGetChunksRequest( "foobar", diff --git a/relay/auth/request_signing.go b/relay/auth/request_signing.go index 8170951f48..fa0adf4e2e 100644 --- a/relay/auth/request_signing.go +++ b/relay/auth/request_signing.go @@ -15,7 +15,7 @@ func HashGetChunksRequest(request *pb.GetChunksRequest) []byte { hasher := sha3.NewLegacyKeccak256() - hasher.Write(request.GetRequesterId()) + hasher.Write(request.GetOperatorId()) for _, chunkRequest := range request.GetChunkRequests() { if chunkRequest.GetByIndex() != nil { getByIndex := chunkRequest.GetByIndex() @@ -46,5 +46,5 @@ func HashGetChunksRequest(request *pb.GetChunksRequest) []byte { func SignGetChunksRequest(keys *core.KeyPair, request *pb.GetChunksRequest) { hash := HashGetChunksRequest(request) signature := keys.SignMessage(([32]byte)(hash)) - request.RequesterSignature = signature.G1Point.Serialize() + request.OperatorSignature = signature.G1Point.Serialize() } diff --git a/relay/auth/request_signing_test.go b/relay/auth/request_signing_test.go index ded7180a1a..3c05188514 100644 --- a/relay/auth/request_signing_test.go +++ b/relay/auth/request_signing_test.go @@ -39,7 +39,7 @@ func randomGetChunksRequest() *pb.GetChunksRequest { } } return &pb.GetChunksRequest{ - RequesterId: tu.RandomBytes(32), + OperatorId: tu.RandomBytes(32), ChunkRequests: requestedChunks, } } @@ -60,12 +60,12 @@ func TestHashGetChunksRequest(t *testing.T) { require.NotEqual(t, hashA, hashB) // Adding a signature should not affect the hash - requestA.RequesterSignature = tu.RandomBytes(32) + requestA.OperatorSignature = tu.RandomBytes(32) hashAA = HashGetChunksRequest(requestA) require.Equal(t, hashA, hashAA) // Changing the requester ID should change the hash - requestA.RequesterId = tu.RandomBytes(32) + requestA.OperatorId = tu.RandomBytes(32) hashAA = HashGetChunksRequest(requestA) require.NotEqual(t, hashA, hashAA) } diff --git a/relay/server.go b/relay/server.go index 4bc61ca8ad..f029da731b 100644 --- a/relay/server.go +++ b/relay/server.go @@ -232,7 +232,7 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* } } - clientID := string(request.RequesterId) + clientID := string(request.OperatorId) err := s.chunkRateLimiter.BeginGetChunkOperation(time.Now(), clientID) if err != nil { From d21e0bc391e5239a301d0a71f54497c84dd2c36a Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 08:52:40 -0600 Subject: [PATCH 27/45] Update proto documentation. Signed-off-by: Cody Littley --- api/grpc/relay/relay.pb.go | 21 ++++++++++++++++++--- api/proto/relay/relay.proto | 21 ++++++++++++++++++--- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/api/grpc/relay/relay.pb.go b/api/grpc/relay/relay.pb.go index 625b6dc8f5..91a69b0c6b 100644 --- a/api/grpc/relay/relay.pb.go +++ b/api/grpc/relay/relay.pb.go @@ -130,9 +130,24 @@ type GetChunksRequest struct { // is an unauthenticated request, this field should be empty. Relays may choose to reject // unauthenticated requests. OperatorId []byte `protobuf:"bytes,2,opt,name=operator_id,json=operatorId,proto3" json:"operator_id,omitempty"` - // If this is an authenticated request, this field will hold a signature by the requester - // on the hash of this request. Signature should be computed with relay.auth.SignGetChunksRequest() - // or an equivalent implementation. Relays may choose to reject unauthenticated requests. + // If this is an authenticated request, this field will hold a BLS signature by the requester + // on the hash of this request. Relays may choose to reject unauthenticated requests. + // + // The following describes the schema for computing the hash of this request + // This algorithm is implemented in golang using relay.auth.HashGetChunksRequest(). + // + // All integers are encoded as unsigned 4 byte big endian values. + // + // Perform a keccak256 hash on the following data in the following order: + // 1. the operator id + // 2. for each chunk request: + // a. if the chunk request is a request by index: + // i. the blob key + // ii. the start index + // iii. the end index + // b. if the chunk request is a request by range: + // i. the blob key + // ii. each requested chunk index, in order OperatorSignature []byte `protobuf:"bytes,3,opt,name=operator_signature,json=operatorSignature,proto3" json:"operator_signature,omitempty"` } diff --git a/api/proto/relay/relay.proto b/api/proto/relay/relay.proto index ebe740622d..ff45dcfafc 100644 --- a/api/proto/relay/relay.proto +++ b/api/proto/relay/relay.proto @@ -37,9 +37,24 @@ message GetChunksRequest { // unauthenticated requests. bytes operator_id = 2; - // If this is an authenticated request, this field will hold a signature by the requester - // on the hash of this request. Signature should be computed with relay.auth.SignGetChunksRequest() - // or an equivalent implementation. Relays may choose to reject unauthenticated requests. + // If this is an authenticated request, this field will hold a BLS signature by the requester + // on the hash of this request. Relays may choose to reject unauthenticated requests. + // + // The following describes the schema for computing the hash of this request + // This algorithm is implemented in golang using relay.auth.HashGetChunksRequest(). + // + // All integers are encoded as unsigned 4 byte big endian values. + // + // Perform a keccak256 hash on the following data in the following order: + // 1. the operator id + // 2. for each chunk request: + // a. if the chunk request is a request by index: + // i. the blob key + // ii. the start index + // iii. the end index + // b. if the chunk request is a request by range: + // i. the blob key + // ii. each requested chunk index, in order bytes operator_signature = 3; } From 98a846942acb57d6bd2d49787b4d652aa5466e9f Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 09:07:29 -0600 Subject: [PATCH 28/45] Add key caching. Signed-off-by: Cody Littley --- relay/auth/authenticator.go | 47 +++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/relay/auth/authenticator.go b/relay/auth/authenticator.go index 65ccdb7227..457059a46b 100644 --- a/relay/auth/authenticator.go +++ b/relay/auth/authenticator.go @@ -45,6 +45,9 @@ type requestAuthenticator struct { // savedAuthLock is used for thread safe atomic modification of the authenticatedClients map and the // authenticationTimeouts queue. savedAuthLock sync.Mutex + + // keyCache is used to cache the public keys of operators. Operator keys are assumed to never change. + keyCache sync.Map } // NewRequestAuthenticator creates a new RequestAuthenticator. @@ -57,6 +60,7 @@ func NewRequestAuthenticator( authenticatedClients: make(map[string]struct{}), authenticationTimeouts: make([]*authenticationTimeout, 0), authenticationTimeoutDuration: authenticationTimeoutDuration, + keyCache: sync.Map{}, } } @@ -70,21 +74,7 @@ func (a *requestAuthenticator) AuthenticateGetChunksRequest( return nil } - blockNumber, err := a.ics.GetCurrentBlockNumber() - if err != nil { - return fmt.Errorf("failed to get current block number: %w", err) - } - operators, err := a.ics.GetIndexedOperators(context.Background(), blockNumber) - if err != nil { - return fmt.Errorf("failed to get operators: %w", err) - } - - operatorID := core.OperatorID(request.OperatorId) - operator, ok := operators[operatorID] - if !ok { - return fmt.Errorf("operator not found (block %d)", blockNumber) - } - key := operator.PubkeyG2 + key, err := a.getOperatorKey(core.OperatorID(request.OperatorId)) g1Point, err := (&core.G1Point{}).Deserialize(request.OperatorSignature) if err != nil { @@ -106,6 +96,33 @@ func (a *requestAuthenticator) AuthenticateGetChunksRequest( return nil } +// getOperatorKey returns the public key of the operator with the given ID, caching the result. +func (a *requestAuthenticator) getOperatorKey(operatorID core.OperatorID) (*core.G2Point, error) { + untypedKey, ok := a.keyCache.Load(operatorID) + if ok { + key := untypedKey.(*core.G2Point) + return key, nil + } + + blockNumber, err := a.ics.GetCurrentBlockNumber() + if err != nil { + return nil, fmt.Errorf("failed to get current block number: %w", err) + } + operators, err := a.ics.GetIndexedOperators(context.Background(), blockNumber) + if err != nil { + return nil, fmt.Errorf("failed to get operators: %w", err) + } + + operator, ok := operators[operatorID] + if !ok { + return nil, errors.New("operator not found") + } + key := operator.PubkeyG2 + + a.keyCache.Store(operatorID, key) + return key, nil +} + // saveAuthenticationResult saves the result of an auth. func (a *requestAuthenticator) saveAuthenticationResult(now time.Time, address string) { if a.authenticationTimeoutDuration == 0 { From 5948e73c418dc0532a6b4384cee3a15e1053e7fb Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 09:08:50 -0600 Subject: [PATCH 29/45] lint Signed-off-by: Cody Littley --- relay/auth/authenticator.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/relay/auth/authenticator.go b/relay/auth/authenticator.go index 457059a46b..f8c9bdbfc7 100644 --- a/relay/auth/authenticator.go +++ b/relay/auth/authenticator.go @@ -75,6 +75,9 @@ func (a *requestAuthenticator) AuthenticateGetChunksRequest( } key, err := a.getOperatorKey(core.OperatorID(request.OperatorId)) + if err != nil { + return fmt.Errorf("failed to get operator key: %w", err) + } g1Point, err := (&core.G1Point{}).Deserialize(request.OperatorSignature) if err != nil { From 2f2209c99f95052c25e2df7aacbe800f75c6ace0 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 09:50:58 -0600 Subject: [PATCH 30/45] Added sane timeouts for relay. Signed-off-by: Cody Littley --- relay/blob_provider.go | 4 +-- relay/blob_provider_test.go | 6 ++-- relay/cache/cached_accessor.go | 54 +++++++++++++++++++++++++---- relay/cache/cached_accessor_test.go | 25 +++++++------ relay/chunk_provider.go | 2 +- relay/metadata_provider.go | 4 +-- relay/metadata_provider_test.go | 14 ++++---- relay/server.go | 24 ++++++++----- 8 files changed, 92 insertions(+), 41 deletions(-) diff --git a/relay/blob_provider.go b/relay/blob_provider.go index 44157f6069..303afe6bb3 100644 --- a/relay/blob_provider.go +++ b/relay/blob_provider.go @@ -46,9 +46,9 @@ func newBlobProvider( } // GetBlob retrieves a blob from the blob store. -func (s *blobProvider) GetBlob(blobKey v2.BlobKey) ([]byte, error) { +func (s *blobProvider) GetBlob(ctx context.Context, blobKey v2.BlobKey) ([]byte, error) { - data, err := s.blobCache.Get(blobKey) + data, err := s.blobCache.Get(ctx, blobKey) if err != nil { // It should not be possible for external users to force an error here since we won't diff --git a/relay/blob_provider_test.go b/relay/blob_provider_test.go index 6e996977bb..a12dd448ed 100644 --- a/relay/blob_provider_test.go +++ b/relay/blob_provider_test.go @@ -39,7 +39,7 @@ func TestReadWrite(t *testing.T) { // Read the blobs back. for key, data := range expectedData { - blob, err := server.GetBlob(key) + blob, err := server.GetBlob(context.Background(), key) require.NoError(t, err) require.Equal(t, data, blob) @@ -47,7 +47,7 @@ func TestReadWrite(t *testing.T) { // Read the blobs back again to test caching. for key, data := range expectedData { - blob, err := server.GetBlob(key) + blob, err := server.GetBlob(context.Background(), key) require.NoError(t, err) require.Equal(t, data, blob) @@ -69,7 +69,7 @@ func TestNonExistentBlob(t *testing.T) { require.NoError(t, err) for i := 0; i < 10; i++ { - blob, err := server.GetBlob(v2.BlobKey(tu.RandomBytes(32))) + blob, err := server.GetBlob(context.Background(), v2.BlobKey(tu.RandomBytes(32))) require.Error(t, err) require.Nil(t, blob) } diff --git a/relay/cache/cached_accessor.go b/relay/cache/cached_accessor.go index e39a3a3910..73ca575e4e 100644 --- a/relay/cache/cached_accessor.go +++ b/relay/cache/cached_accessor.go @@ -1,6 +1,7 @@ package cache import ( + "context" lru "github.com/hashicorp/golang-lru/v2" "sync" ) @@ -9,7 +10,9 @@ import ( // are expensive, and prevents multiple concurrent cache misses for the same key. type CachedAccessor[K comparable, V any] interface { // Get returns the value for the given key. If the value is not in the cache, it will be fetched using the Accessor. - Get(key K) (V, error) + // If the context is cancelled, the function may abort early. If multiple goroutines request the same key, + // cancellation of one request will not affect the others. + Get(ctx context.Context, key K) (V, error) } // Accessor is function capable of fetching a value from a resource. Used by CachedAccessor when there is a cache miss. @@ -92,8 +95,7 @@ func newAccessResult[V any]() *accessResult[V] { return result } -func (c *cachedAccessor[K, V]) Get(key K) (V, error) { - +func (c *cachedAccessor[K, V]) Get(ctx context.Context, key K) (V, error) { c.cacheLock.Lock() // first, attempt to get the value from the cache @@ -114,11 +116,42 @@ func (c *cachedAccessor[K, V]) Get(key K) (V, error) { if alreadyLoading { // The result is being fetched on another goroutine. Wait for it to finish. - result.wg.Wait() - return result.value, result.err + return c.waitForResult(ctx, result) } else { // We are the first goroutine to request this key. + return c.fetchResult(ctx, key, result) + } +} +// waitForResult waits for the result of a lookup that was initiated by another requester and returns it +// when it becomes is available. This method will return quickly if the provided context is cancelled. +// Doing so does not disrupt the other requesters that are also waiting for this result. +func (c *cachedAccessor[K, V]) waitForResult(ctx context.Context, result *accessResult[V]) (V, error) { + wgChan := make(chan struct{}, 1) + go func() { + // Wait inside this goroutine for select statement compatibility. + result.wg.Wait() + wgChan <- struct{}{} + }() + + select { + case <-ctx.Done(): + // The context was cancelled before the value was fetched, possibly due to a timeout. + var zeroValue V + return zeroValue, ctx.Err() + case <-wgChan: + return result.value, result.err + } +} + +// fetchResult fetches the value for the given key and returns it. If the context is cancelled before the value +// is fetched, the function will return early. If the fetch is successful, the value will be added to the cache. +func (c *cachedAccessor[K, V]) fetchResult(ctx context.Context, key K, result *accessResult[V]) (V, error) { + + // Perform the work in a background goroutine. This allows us to return early if the context is cancelled + // without disrupting the fetch operation that other requesters may be waiting for. + waitChan := make(chan struct{}, 1) + go func() { if c.concurrencyLimiter != nil { c.concurrencyLimiter <- struct{}{} } @@ -146,6 +179,15 @@ func (c *cachedAccessor[K, V]) Get(key K) (V, error) { c.cacheLock.Unlock() - return value, err + waitChan <- struct{}{} + }() + + select { + case <-ctx.Done(): + // The context was cancelled before the value was fetched, possibly due to a timeout. + var zeroValue V + return zeroValue, ctx.Err() + case <-waitChan: + return result.value, result.err } } diff --git a/relay/cache/cached_accessor_test.go b/relay/cache/cached_accessor_test.go index ab37fa5a2e..a59b4835b3 100644 --- a/relay/cache/cached_accessor_test.go +++ b/relay/cache/cached_accessor_test.go @@ -1,6 +1,7 @@ package cache import ( + "context" "errors" tu "github.com/Layr-Labs/eigenda/common/testutils" "github.com/stretchr/testify/require" @@ -36,7 +37,7 @@ func TestRandomOperationsSingleThread(t *testing.T) { require.NoError(t, err) for i := 0; i < dataSize; i++ { - value, err := ca.Get(i) + value, err := ca.Get(context.Background(), i) if i%17 == 0 { require.Error(t, err) @@ -48,7 +49,7 @@ func TestRandomOperationsSingleThread(t *testing.T) { } for k, v := range baseData { - value, err := ca.Get(k) + value, err := ca.Get(context.Background(), k) if k%17 == 0 { require.Error(t, err) @@ -86,7 +87,7 @@ func TestCacheMisses(t *testing.T) { expectedCacheMissCount := uint64(0) for i := 0; i < cacheSize; i++ { expectedCacheMissCount++ - value, err := ca.Get(i) + value, err := ca.Get(context.Background(), i) require.NoError(t, err) require.Equal(t, baseData[i], *value) require.Equal(t, expectedCacheMissCount, cacheMissCount.Load()) @@ -94,7 +95,7 @@ func TestCacheMisses(t *testing.T) { // Get the first cacheSize keys again. This should not increase the cache miss count. for i := 0; i < cacheSize; i++ { - value, err := ca.Get(i) + value, err := ca.Get(context.Background(), i) require.NoError(t, err) require.Equal(t, baseData[i], *value) require.Equal(t, expectedCacheMissCount, cacheMissCount.Load()) @@ -102,14 +103,14 @@ func TestCacheMisses(t *testing.T) { // Read the last key. This should cause the first key to be evicted. expectedCacheMissCount++ - value, err := ca.Get(cacheSize) + value, err := ca.Get(context.Background(), cacheSize) require.NoError(t, err) require.Equal(t, baseData[cacheSize], *value) // Read the keys in order. Due to the order of evictions, each read should result in a cache miss. for i := 0; i < cacheSize; i++ { expectedCacheMissCount++ - value, err := ca.Get(i) + value, err := ca.Get(context.Background(), i) require.NoError(t, err) require.Equal(t, baseData[i], *value) require.Equal(t, expectedCacheMissCount, cacheMissCount.Load()) @@ -154,7 +155,7 @@ func ParallelAccessTest(t *testing.T, sleepEnabled bool) { for i := 0; i < 10; i++ { go func() { defer wg.Done() - value, err := ca.Get(0) + value, err := ca.Get(context.Background(), 0) require.NoError(t, err) require.Equal(t, baseData[0], *value) }() @@ -177,7 +178,7 @@ func ParallelAccessTest(t *testing.T, sleepEnabled bool) { require.Equal(t, uint64(1), cacheMissCount.Load()) // Fetching the key again should not result in a cache miss. - value, err := ca.Get(0) + value, err := ca.Get(context.Background(), 0) require.NoError(t, err) require.Equal(t, baseData[0], *value) require.Equal(t, uint64(1), cacheMissCount.Load()) @@ -223,7 +224,7 @@ func TestParallelAccessWithError(t *testing.T) { for i := 0; i < 10; i++ { go func() { defer wg.Done() - value, err := ca.Get(0) + value, err := ca.Get(context.Background(), 0) require.Nil(t, value) require.Equal(t, errors.New("intentional error"), err) }() @@ -246,7 +247,7 @@ func TestParallelAccessWithError(t *testing.T) { require.True(t, count >= 1) // Fetching the key again should result in another cache miss since the previous fetch failed. - value, err := ca.Get(0) + value, err := ca.Get(context.Background(), 0) require.Nil(t, value) require.Equal(t, errors.New("intentional error"), err) require.Equal(t, count+1, cacheMissCount.Load()) @@ -291,7 +292,7 @@ func TestConcurrencyLimiter(t *testing.T) { for i := 0; i < dataSize; i++ { boundI := i go func() { - value, err := ca.Get(boundI) + value, err := ca.Get(context.Background(), boundI) require.NoError(t, err) require.Equal(t, baseData[boundI], *value) wg.Done() @@ -310,3 +311,5 @@ func TestConcurrencyLimiter(t *testing.T) { accessorLock.Unlock() wg.Wait() } + +// TODO test what happens when the context is cancelled diff --git a/relay/chunk_provider.go b/relay/chunk_provider.go index 3fffb42a3b..577d88896b 100644 --- a/relay/chunk_provider.go +++ b/relay/chunk_provider.go @@ -89,7 +89,7 @@ func (s *chunkProvider) GetFrames(ctx context.Context, mMap metadataMap) (frameM boundKey := key go func() { - frames, err := s.frameCache.Get(*boundKey) + frames, err := s.frameCache.Get(ctx, *boundKey) if err != nil { s.logger.Errorf("Failed to get frames for blob %v: %v", boundKey.blobKey, err) completionChannel <- &framesResult{ diff --git a/relay/metadata_provider.go b/relay/metadata_provider.go index c916d287f2..98e7bf9c89 100644 --- a/relay/metadata_provider.go +++ b/relay/metadata_provider.go @@ -79,7 +79,7 @@ func newMetadataProvider( type metadataMap map[v2.BlobKey]*blobMetadata // GetMetadataForBlobs retrieves metadata about multiple blobs in parallel. -func (m *metadataProvider) GetMetadataForBlobs(keys []v2.BlobKey) (metadataMap, error) { +func (m *metadataProvider) GetMetadataForBlobs(ctx context.Context, keys []v2.BlobKey) (metadataMap, error) { // blobMetadataResult is the result of a metadata fetch operation. type blobMetadataResult struct { @@ -102,7 +102,7 @@ func (m *metadataProvider) GetMetadataForBlobs(keys []v2.BlobKey) (metadataMap, boundKey := key go func() { - metadata, err := m.metadataCache.Get(boundKey) + metadata, err := m.metadataCache.Get(ctx, boundKey) if err != nil { // Intentionally log at debug level. External users can force this condition to trigger // by requesting metadata for a blob that does not exist, and so it's important to avoid diff --git a/relay/metadata_provider_test.go b/relay/metadata_provider_test.go index e5586d901b..c090b92939 100644 --- a/relay/metadata_provider_test.go +++ b/relay/metadata_provider_test.go @@ -27,7 +27,7 @@ func TestGetNonExistentBlob(t *testing.T) { // Try to fetch a non-existent blobs for i := 0; i < 10; i++ { - _, err := server.GetMetadataForBlobs([]v2.BlobKey{v2.BlobKey(tu.RandomBytes(32))}) + _, err := server.GetMetadataForBlobs(context.Background(), []v2.BlobKey{v2.BlobKey(tu.RandomBytes(32))}) require.Error(t, err) } } @@ -85,7 +85,7 @@ func TestFetchingIndividualMetadata(t *testing.T) { // Fetch the metadata from the server. for blobKey, totalChunkSizeBytes := range totalChunkSizeMap { - mMap, err := server.GetMetadataForBlobs([]v2.BlobKey{blobKey}) + mMap, err := server.GetMetadataForBlobs(context.Background(), []v2.BlobKey{blobKey}) require.NoError(t, err) require.Equal(t, 1, len(mMap)) metadata := mMap[blobKey] @@ -96,7 +96,7 @@ func TestFetchingIndividualMetadata(t *testing.T) { // Read it back again. This uses a different code pathway due to the cache. for blobKey, totalChunkSizeBytes := range totalChunkSizeMap { - mMap, err := server.GetMetadataForBlobs([]v2.BlobKey{blobKey}) + mMap, err := server.GetMetadataForBlobs(context.Background(), []v2.BlobKey{blobKey}) require.NoError(t, err) require.Equal(t, 1, len(mMap)) metadata := mMap[blobKey] @@ -168,7 +168,7 @@ func TestBatchedFetch(t *testing.T) { } } - mMap, err := server.GetMetadataForBlobs(keys) + mMap, err := server.GetMetadataForBlobs(context.Background(), keys) require.NoError(t, err) assert.Equal(t, keyCount, len(mMap)) @@ -261,7 +261,7 @@ func TestIndividualFetchWithSharding(t *testing.T) { } } - mMap, err := server.GetMetadataForBlobs([]v2.BlobKey{blobKey}) + mMap, err := server.GetMetadataForBlobs(context.Background(), []v2.BlobKey{blobKey}) if isBlobInCorrectShard { // The blob is in the relay's shard, should be returned like normal @@ -288,7 +288,7 @@ func TestIndividualFetchWithSharding(t *testing.T) { } } - mMap, err := server.GetMetadataForBlobs([]v2.BlobKey{blobKey}) + mMap, err := server.GetMetadataForBlobs(context.Background(), []v2.BlobKey{blobKey}) if isBlobInCorrectShard { // The blob is in the relay's shard, should be returned like normal @@ -401,7 +401,7 @@ func TestBatchedFetchWithSharding(t *testing.T) { } } - mMap, err := server.GetMetadataForBlobs(keys) + mMap, err := server.GetMetadataForBlobs(context.Background(), keys) if areKeysInCorrectShard { require.NoError(t, err) assert.Equal(t, keyCount, len(mMap)) diff --git a/relay/server.go b/relay/server.go index f029da731b..550cd03fcd 100644 --- a/relay/server.go +++ b/relay/server.go @@ -100,6 +100,13 @@ type Config struct { // AuthenticationDisabled will disable authentication if set to true. AuthenticationDisabled bool + + // TODO flagify + // The maximum time permitted for a GetChunks operation to complete. If zero then no timeout is enforced. + GetChunksTimeout time.Duration + + // The maximum time permitted for a GetBlob operation to complete. If zero then no timeout is enforced. + GetBlobTimeout time.Duration } // NewServer creates a new relay Server. @@ -162,9 +169,11 @@ func NewServer( // GetBlob retrieves a blob stored by the relay. func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.GetBlobReply, error) { - - // TODO(cody-littley): - // - timeouts + if s.config.GetChunksTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, s.config.GetBlobTimeout) + defer cancel() + } err := s.blobRateLimiter.BeginGetBlobOperation(time.Now()) if err != nil { @@ -178,7 +187,7 @@ func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.G } keys := []v2.BlobKey{key} - mMap, err := s.metadataProvider.GetMetadataForBlobs(keys) + mMap, err := s.metadataProvider.GetMetadataForBlobs(ctx, keys) if err != nil { return nil, fmt.Errorf( "error fetching metadata for blob, check if blob exists and is assigned to this relay: %w", err) @@ -193,7 +202,7 @@ func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.G return nil, err } - data, err := s.blobProvider.GetBlob(key) + data, err := s.blobProvider.GetBlob(ctx, key) if err != nil { return nil, fmt.Errorf("error fetching blob %s: %w", key.Hex(), err) } @@ -208,9 +217,6 @@ func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.G // GetChunks retrieves chunks from blobs stored by the relay. func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (*pb.GetChunksReply, error) { - // TODO(cody-littley): - // - timeouts - if len(request.ChunkRequests) <= 0 { return nil, fmt.Errorf("no chunk requests provided") } @@ -245,7 +251,7 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* return nil, err } - mMap, err := s.metadataProvider.GetMetadataForBlobs(keys) + mMap, err := s.metadataProvider.GetMetadataForBlobs(ctx, keys) if err != nil { return nil, fmt.Errorf( "error fetching metadata for blob, check if blob exists and is assigned to this relay: %w", err) From 4204ea0b1f55a10bca2128740f781814c8222344 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 10:32:13 -0600 Subject: [PATCH 31/45] Added moar timeouts. Signed-off-by: Cody Littley --- common/aws/cli.go | 10 ------- common/aws/s3/client.go | 6 ----- relay/blob_provider.go | 20 +++++++++----- relay/blob_provider_test.go | 17 ++++++++++-- relay/chunk_provider.go | 29 ++++++++++++++++----- relay/chunk_provider_test.go | 19 ++++++++++++-- relay/metadata_provider.go | 14 ++++++++-- relay/metadata_provider_test.go | 46 +++++++++++++++++++++++++++++---- relay/relay_test_utils.go | 28 +++++++++----------- relay/server.go | 22 ++++++++-------- relay/server_test.go | 9 +++++++ relay/timeout_config.go | 28 ++++++++++++++++++++ 12 files changed, 182 insertions(+), 66 deletions(-) create mode 100644 relay/timeout_config.go diff --git a/common/aws/cli.go b/common/aws/cli.go index d1b3bf274a..e646712175 100644 --- a/common/aws/cli.go +++ b/common/aws/cli.go @@ -37,12 +37,6 @@ type ClientConfig struct { // FragmentParallelismConstant helps determine the size of the pool of workers to help upload/download files. // A non-zero value for this parameter adds a constant number of workers. Default is 0. FragmentParallelismConstant int - // FragmentReadTimeout is used to bound the maximum time to wait for a single fragmented read. - // Default is 30 seconds. - FragmentReadTimeout time.Duration - // FragmentWriteTimeout is used to bound the maximum time to wait for a single fragmented write. - // Default is 30 seconds. - FragmentWriteTimeout time.Duration } func ClientFlags(envPrefix string, flagPrefix string) []cli.Flag { @@ -120,8 +114,6 @@ func ReadClientConfig(ctx *cli.Context, flagPrefix string) ClientConfig { EndpointURL: ctx.GlobalString(common.PrefixFlag(flagPrefix, EndpointURLFlagName)), FragmentParallelismFactor: ctx.GlobalInt(common.PrefixFlag(flagPrefix, FragmentParallelismFactorFlagName)), FragmentParallelismConstant: ctx.GlobalInt(common.PrefixFlag(flagPrefix, FragmentParallelismConstantFlagName)), - FragmentReadTimeout: ctx.GlobalDuration(common.PrefixFlag(flagPrefix, FragmentReadTimeoutFlagName)), - FragmentWriteTimeout: ctx.GlobalDuration(common.PrefixFlag(flagPrefix, FragmentWriteTimeoutFlagName)), } } @@ -131,7 +123,5 @@ func DefaultClientConfig() *ClientConfig { Region: "us-east-2", FragmentParallelismFactor: 8, FragmentParallelismConstant: 0, - FragmentReadTimeout: 30 * time.Second, - FragmentWriteTimeout: 30 * time.Second, } } diff --git a/common/aws/s3/client.go b/common/aws/s3/client.go index 3b773140c8..2a4cf8abed 100644 --- a/common/aws/s3/client.go +++ b/common/aws/s3/client.go @@ -223,9 +223,6 @@ func (s *client) FragmentedUploadObject( } resultChannel := make(chan error, len(fragments)) - ctx, cancel := context.WithTimeout(ctx, s.cfg.FragmentWriteTimeout) - defer cancel() - for _, fragment := range fragments { fragmentCapture := fragment s.concurrencyLimiter <- struct{}{} @@ -283,9 +280,6 @@ func (s *client) FragmentedDownloadObject( } resultChannel := make(chan *readResult, len(fragmentKeys)) - ctx, cancel := context.WithTimeout(ctx, s.cfg.FragmentWriteTimeout) - defer cancel() - for i, fragmentKey := range fragmentKeys { boundFragmentKey := fragmentKey boundI := i diff --git a/relay/blob_provider.go b/relay/blob_provider.go index 303afe6bb3..9b9863bfda 100644 --- a/relay/blob_provider.go +++ b/relay/blob_provider.go @@ -7,6 +7,7 @@ import ( "github.com/Layr-Labs/eigenda/disperser/common/v2/blobstore" "github.com/Layr-Labs/eigenda/relay/cache" "github.com/Layr-Labs/eigensdk-go/logging" + "time" ) // blobProvider encapsulates logic for fetching blobs. Utilized by the relay Server. @@ -20,6 +21,9 @@ type blobProvider struct { // blobCache is an LRU cache of blobs. blobCache cache.CachedAccessor[v2.BlobKey, []byte] + + // fetchTimeout is the maximum time to wait for a blob fetch operation to complete. + fetchTimeout time.Duration } // newBlobProvider creates a new blobProvider. @@ -28,12 +32,14 @@ func newBlobProvider( logger logging.Logger, blobStore *blobstore.BlobStore, blobCacheSize int, - maxIOConcurrency int) (*blobProvider, error) { + maxIOConcurrency int, + fetchTimeout time.Duration) (*blobProvider, error) { server := &blobProvider{ - ctx: ctx, - logger: logger, - blobStore: blobStore, + ctx: ctx, + logger: logger, + blobStore: blobStore, + fetchTimeout: fetchTimeout, } c, err := cache.NewCachedAccessor[v2.BlobKey, []byte](blobCacheSize, maxIOConcurrency, server.fetchBlob) @@ -47,7 +53,6 @@ func newBlobProvider( // GetBlob retrieves a blob from the blob store. func (s *blobProvider) GetBlob(ctx context.Context, blobKey v2.BlobKey) ([]byte, error) { - data, err := s.blobCache.Get(ctx, blobKey) if err != nil { @@ -62,7 +67,10 @@ func (s *blobProvider) GetBlob(ctx context.Context, blobKey v2.BlobKey) ([]byte, // fetchBlob retrieves a single blob from the blob store. func (s *blobProvider) fetchBlob(blobKey v2.BlobKey) ([]byte, error) { - data, err := s.blobStore.GetBlob(s.ctx, blobKey) + ctx, cancel := context.WithTimeout(s.ctx, s.fetchTimeout) + defer cancel() + + data, err := s.blobStore.GetBlob(ctx, blobKey) if err != nil { s.logger.Errorf("Failed to fetch blob: %v", err) return nil, err diff --git a/relay/blob_provider_test.go b/relay/blob_provider_test.go index a12dd448ed..9309461c65 100644 --- a/relay/blob_provider_test.go +++ b/relay/blob_provider_test.go @@ -7,6 +7,7 @@ import ( v2 "github.com/Layr-Labs/eigenda/core/v2" "github.com/stretchr/testify/require" "testing" + "time" ) func TestReadWrite(t *testing.T) { @@ -34,7 +35,13 @@ func TestReadWrite(t *testing.T) { require.NoError(t, err) } - server, err := newBlobProvider(context.Background(), logger, blobStore, 10, 32) + server, err := newBlobProvider( + context.Background(), + logger, + blobStore, + 10, + 32, + 10*time.Second) require.NoError(t, err) // Read the blobs back. @@ -65,7 +72,13 @@ func TestNonExistentBlob(t *testing.T) { blobStore := buildBlobStore(t, logger) - server, err := newBlobProvider(context.Background(), logger, blobStore, 10, 32) + server, err := newBlobProvider( + context.Background(), + logger, + blobStore, + 10, + 32, + 10*time.Second) require.NoError(t, err) for i := 0; i < 10; i++ { diff --git a/relay/chunk_provider.go b/relay/chunk_provider.go index 577d88896b..48ece7c3cd 100644 --- a/relay/chunk_provider.go +++ b/relay/chunk_provider.go @@ -11,6 +11,7 @@ import ( "github.com/Layr-Labs/eigenda/relay/chunkstore" "github.com/Layr-Labs/eigensdk-go/logging" "sync" + "time" ) type chunkProvider struct { @@ -23,6 +24,12 @@ type chunkProvider struct { // chunkReader is used to read chunks from the chunk store. chunkReader chunkstore.ChunkReader + + // fetchTimeout is the maximum time to wait for a chunk proof fetch operation to complete. + proofFetchTimeout time.Duration + + // coefficientFetchTimeout is the maximum time to wait for a chunk coefficient fetch operation to complete. + coefficientFetchTimeout time.Duration } // blobKeyWithMetadata attaches some additional metadata to a blobKey. @@ -41,12 +48,16 @@ func newChunkProvider( logger logging.Logger, chunkReader chunkstore.ChunkReader, cacheSize int, - maxIOConcurrency int) (*chunkProvider, error) { + maxIOConcurrency int, + proofFetchTimeout time.Duration, + coefficientFetchTimeout time.Duration) (*chunkProvider, error) { server := &chunkProvider{ - ctx: ctx, - logger: logger, - chunkReader: chunkReader, + ctx: ctx, + logger: logger, + chunkReader: chunkReader, + proofFetchTimeout: proofFetchTimeout, + coefficientFetchTimeout: coefficientFetchTimeout, } c, err := cache.NewCachedAccessor[blobKeyWithMetadata, []*encoding.Frame]( @@ -128,10 +139,13 @@ func (s *chunkProvider) fetchFrames(key blobKeyWithMetadata) ([]*encoding.Frame, var proofsErr error go func() { + ctx, cancel := context.WithTimeout(s.ctx, s.proofFetchTimeout) defer func() { wg.Done() + cancel() }() - proofs, proofsErr = s.chunkReader.GetChunkProofs(s.ctx, key.blobKey) + + proofs, proofsErr = s.chunkReader.GetChunkProofs(ctx, key.blobKey) }() fragmentInfo := &encoding.FragmentInfo{ @@ -139,7 +153,10 @@ func (s *chunkProvider) fetchFrames(key blobKeyWithMetadata) ([]*encoding.Frame, FragmentSizeBytes: key.metadata.fragmentSizeBytes, } - coefficients, err := s.chunkReader.GetChunkCoefficients(s.ctx, key.blobKey, fragmentInfo) + ctx, cancel := context.WithTimeout(s.ctx, s.coefficientFetchTimeout) + defer cancel() + + coefficients, err := s.chunkReader.GetChunkCoefficients(ctx, key.blobKey, fragmentInfo) if err != nil { return nil, err } diff --git a/relay/chunk_provider_test.go b/relay/chunk_provider_test.go index b768210d77..8615ad7d23 100644 --- a/relay/chunk_provider_test.go +++ b/relay/chunk_provider_test.go @@ -8,6 +8,7 @@ import ( "github.com/Layr-Labs/eigenda/encoding" "github.com/stretchr/testify/require" "testing" + "time" ) func TestFetchingIndividualBlobs(t *testing.T) { @@ -44,7 +45,14 @@ func TestFetchingIndividualBlobs(t *testing.T) { fragmentInfoMap[blobKey] = fragmentInfo } - server, err := newChunkProvider(context.Background(), logger, chunkReader, 10, 32) + server, err := newChunkProvider( + context.Background(), + logger, + chunkReader, + 10, + 32, + 10*time.Second, + 10*time.Second) require.NoError(t, err) // Read it back. @@ -124,7 +132,14 @@ func TestFetchingBatchedBlobs(t *testing.T) { fragmentInfoMap[blobKey] = fragmentInfo } - server, err := newChunkProvider(context.Background(), logger, chunkReader, 10, 32) + server, err := newChunkProvider( + context.Background(), + logger, + chunkReader, + 10, + 32, + 10*time.Second, + 10*time.Second) require.NoError(t, err) // Read it back. diff --git a/relay/metadata_provider.go b/relay/metadata_provider.go index 98e7bf9c89..99d6c4b053 100644 --- a/relay/metadata_provider.go +++ b/relay/metadata_provider.go @@ -9,6 +9,7 @@ import ( "github.com/Layr-Labs/eigenda/relay/cache" "github.com/Layr-Labs/eigensdk-go/logging" "sync/atomic" + "time" ) // Metadata about a blob. The relay only needs a small subset of a blob's metadata. @@ -39,6 +40,9 @@ type metadataProvider struct { // relayIDSet is the set of relay IDs assigned to this relay. This relay will refuse to serve metadata for blobs // that are not assigned to one of these IDs. relayIDSet map[v2.RelayKey]struct{} + + // fetchTimeout is the maximum time to wait for a metadata fetch operation to complete. + fetchTimeout time.Duration } // newMetadataProvider creates a new metadataProvider. @@ -48,7 +52,8 @@ func newMetadataProvider( metadataStore *blobstore.BlobMetadataStore, metadataCacheSize int, maxIOConcurrency int, - relayIDs []v2.RelayKey) (*metadataProvider, error) { + relayIDs []v2.RelayKey, + fetchTimeout time.Duration) (*metadataProvider, error) { relayIDSet := make(map[v2.RelayKey]struct{}, len(relayIDs)) for _, id := range relayIDs { @@ -60,6 +65,7 @@ func newMetadataProvider( logger: logger, metadataStore: metadataStore, relayIDSet: relayIDSet, + fetchTimeout: fetchTimeout, } metadataCache, err := cache.NewCachedAccessor[v2.BlobKey, *blobMetadata]( @@ -136,8 +142,12 @@ func (m *metadataProvider) GetMetadataForBlobs(ctx context.Context, keys []v2.Bl // fetchMetadata retrieves metadata about a blob. Fetches from the cache if available, otherwise from the store. func (m *metadataProvider) fetchMetadata(key v2.BlobKey) (*blobMetadata, error) { + + ctx, cancel := context.WithTimeout(m.ctx, m.fetchTimeout) + defer cancel() + // Retrieve the metadata from the store. - cert, fragmentInfo, err := m.metadataStore.GetBlobCertificate(m.ctx, key) + cert, fragmentInfo, err := m.metadataStore.GetBlobCertificate(ctx, key) if err != nil { return nil, fmt.Errorf("error retrieving metadata for blob %s: %w", key.Hex(), err) } diff --git a/relay/metadata_provider_test.go b/relay/metadata_provider_test.go index c090b92939..26e76cf6c9 100644 --- a/relay/metadata_provider_test.go +++ b/relay/metadata_provider_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/require" "math/rand" "testing" + "time" ) func TestGetNonExistentBlob(t *testing.T) { @@ -22,7 +23,14 @@ func TestGetNonExistentBlob(t *testing.T) { defer teardown() metadataStore := buildMetadataStore(t) - server, err := newMetadataProvider(context.Background(), logger, metadataStore, 1024*1024, 32, nil) + server, err := newMetadataProvider( + context.Background(), + logger, + metadataStore, + 1024*1024, + 32, + nil, + 10*time.Second) require.NoError(t, err) // Try to fetch a non-existent blobs @@ -80,7 +88,14 @@ func TestFetchingIndividualMetadata(t *testing.T) { require.Equal(t, fragmentSizeMap[blobKey], fragmentInfo.FragmentSizeBytes) } - server, err := newMetadataProvider(context.Background(), logger, metadataStore, 1024*1024, 32, nil) + server, err := newMetadataProvider( + context.Background(), + logger, + metadataStore, + 1024*1024, + 32, + nil, + 10*time.Second) require.NoError(t, err) // Fetch the metadata from the server. @@ -154,7 +169,14 @@ func TestBatchedFetch(t *testing.T) { require.Equal(t, fragmentSizeMap[blobKey], fragmentInfo.FragmentSizeBytes) } - server, err := newMetadataProvider(context.Background(), logger, metadataStore, 1024*1024, 32, nil) + server, err := newMetadataProvider( + context.Background(), + logger, + metadataStore, + 1024*1024, + 32, + nil, + 10*time.Second) require.NoError(t, err) // Each iteration, choose a random subset of the keys to fetch @@ -247,7 +269,14 @@ func TestIndividualFetchWithSharding(t *testing.T) { require.Equal(t, fragmentSizeMap[blobKey], fragmentInfo.FragmentSizeBytes) } - server, err := newMetadataProvider(context.Background(), logger, metadataStore, 1024*1024, 32, shardList) + server, err := newMetadataProvider( + context.Background(), + logger, + metadataStore, + 1024*1024, + 32, + shardList, + 10*time.Second) require.NoError(t, err) // Fetch the metadata from the server. @@ -371,7 +400,14 @@ func TestBatchedFetchWithSharding(t *testing.T) { require.Equal(t, fragmentSizeMap[blobKey], fragmentInfo.FragmentSizeBytes) } - server, err := newMetadataProvider(context.Background(), logger, metadataStore, 1024*1024, 32, shardList) + server, err := newMetadataProvider( + context.Background(), + logger, + metadataStore, + 1024*1024, + 32, + shardList, + 10*time.Second) require.NoError(t, err) // Each iteration, choose two random keys to fetch. There will be a 25% chance that both blobs map to valid shards. diff --git a/relay/relay_test_utils.go b/relay/relay_test_utils.go index f850b65cc7..abaf8a4e45 100644 --- a/relay/relay_test_utils.go +++ b/relay/relay_test_utils.go @@ -3,15 +3,6 @@ package relay import ( "context" "fmt" - "log" - "math/big" - "os" - "path/filepath" - "runtime" - "strings" - "testing" - "time" - pbcommon "github.com/Layr-Labs/eigenda/api/grpc/common" pbcommonv2 "github.com/Layr-Labs/eigenda/api/grpc/common/v2" "github.com/Layr-Labs/eigenda/common" @@ -33,6 +24,13 @@ import ( "github.com/google/uuid" "github.com/ory/dockertest/v3" "github.com/stretchr/testify/require" + "log" + "math/big" + "os" + "path/filepath" + "runtime" + "strings" + "testing" ) var ( @@ -155,12 +153,10 @@ func buildBlobStore(t *testing.T, logger logging.Logger) *blobstore.BlobStore { func buildChunkStore(t *testing.T, logger logging.Logger) (chunkstore.ChunkReader, chunkstore.ChunkWriter) { cfg := aws.ClientConfig{ - Region: "us-east-1", - AccessKey: "localstack", - SecretAccessKey: "localstack", - EndpointURL: localstackHost, - FragmentWriteTimeout: time.Duration(10) * time.Second, - FragmentReadTimeout: time.Duration(10) * time.Second, + Region: "us-east-1", + AccessKey: "localstack", + SecretAccessKey: "localstack", + EndpointURL: localstackHost, } client, err := s3.NewClient(context.Background(), cfg, logger) @@ -178,7 +174,7 @@ func buildChunkStore(t *testing.T, logger logging.Logger) (chunkstore.ChunkReade func randomBlob(t *testing.T) (*v2.BlobHeader, []byte) { - data := tu.RandomBytes(225) // TODO talk to Ian about this + data := tu.RandomBytes(225) data = codec.ConvertByPaddingEmptyByte(data) commitments, err := prover.GetCommitments(data) diff --git a/relay/server.go b/relay/server.go index 550cd03fcd..6dee6817a1 100644 --- a/relay/server.go +++ b/relay/server.go @@ -101,12 +101,8 @@ type Config struct { // AuthenticationDisabled will disable authentication if set to true. AuthenticationDisabled bool - // TODO flagify - // The maximum time permitted for a GetChunks operation to complete. If zero then no timeout is enforced. - GetChunksTimeout time.Duration - - // The maximum time permitted for a GetBlob operation to complete. If zero then no timeout is enforced. - GetBlobTimeout time.Duration + // Timeouts contains configuration for relay timeouts. + Timeouts TimeoutConfig } // NewServer creates a new relay Server. @@ -125,7 +121,8 @@ func NewServer( metadataStore, config.MetadataCacheSize, config.MetadataMaxConcurrency, - config.RelayIDs) + config.RelayIDs, + config.Timeouts.InternalGetMetadataTimeout) if err != nil { return nil, fmt.Errorf("error creating metadata provider: %w", err) } @@ -135,7 +132,8 @@ func NewServer( logger, blobStore, config.BlobCacheSize, - config.BlobMaxConcurrency) + config.BlobMaxConcurrency, + config.Timeouts.InternalGetBlobTimeout) if err != nil { return nil, fmt.Errorf("error creating blob provider: %w", err) } @@ -145,7 +143,9 @@ func NewServer( logger, chunkReader, config.ChunkCacheSize, - config.ChunkMaxConcurrency) + config.ChunkMaxConcurrency, + config.Timeouts.InternalGetProofsTimeout, + config.Timeouts.InternalGetCoefficientsTimeout) if err != nil { return nil, fmt.Errorf("error creating chunk provider: %w", err) } @@ -169,9 +169,9 @@ func NewServer( // GetBlob retrieves a blob stored by the relay. func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.GetBlobReply, error) { - if s.config.GetChunksTimeout > 0 { + if s.config.Timeouts.GetChunksTimeout > 0 { var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, s.config.GetBlobTimeout) + ctx, cancel = context.WithTimeout(ctx, s.config.Timeouts.GetBlobTimeout) defer cancel() } diff --git a/relay/server_test.go b/relay/server_test.go index 96d465a788..06063d9a74 100644 --- a/relay/server_test.go +++ b/relay/server_test.go @@ -5,6 +5,7 @@ import ( "github.com/Layr-Labs/eigenda/relay/limiter" "math/rand" "testing" + "time" pb "github.com/Layr-Labs/eigenda/api/grpc/relay" "github.com/Layr-Labs/eigenda/common" @@ -46,6 +47,14 @@ func defaultConfig() *Config { MaxConcurrentGetChunkOpsClient: 1, }, AuthenticationDisabled: true, + Timeouts: TimeoutConfig{ + GetBlobTimeout: 10 * time.Second, + GetChunksTimeout: 10 * time.Second, + InternalGetMetadataTimeout: 10 * time.Second, + InternalGetBlobTimeout: 10 * time.Second, + InternalGetProofsTimeout: 10 * time.Second, + InternalGetCoefficientsTimeout: 10 * time.Second, + }, } } diff --git a/relay/timeout_config.go b/relay/timeout_config.go new file mode 100644 index 0000000000..c32475fa69 --- /dev/null +++ b/relay/timeout_config.go @@ -0,0 +1,28 @@ +package relay + +import "time" + +// TODO flagify + +// TimeoutConfig encapsulates the timeout configuration for the relay server. +type TimeoutConfig struct { + + // The maximum time permitted for a GetChunks GRPC to complete. If zero then no timeout is enforced. + GetChunksTimeout time.Duration + + // The maximum time permitted for a GetBlob GRPC to complete. If zero then no timeout is enforced. + GetBlobTimeout time.Duration + + // The maximum time permitted for a single request to the metadata store to fetch the metadata + // for an individual blob. + InternalGetMetadataTimeout time.Duration + + // The maximum time permitted for a single request to the blob store to fetch a blob. + InternalGetBlobTimeout time.Duration + + // The maximum time permitted for a single request to the chunk store to fetch chunk proofs. + InternalGetProofsTimeout time.Duration + + // The maximum time permitted for a single request to the chunk store to fetch chunk coefficients. + InternalGetCoefficientsTimeout time.Duration +} From 0892c9cf3163e810c59de74fb5d0ea848e15b89d Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 10:39:39 -0600 Subject: [PATCH 32/45] Add flags. Signed-off-by: Cody Littley --- relay/cmd/config.go | 8 +++++++ relay/cmd/flags/flags.go | 48 ++++++++++++++++++++++++++++++++++++++++ relay/timeout_config.go | 2 -- 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/relay/cmd/config.go b/relay/cmd/config.go index 1795ab836e..8884e5c71c 100644 --- a/relay/cmd/config.go +++ b/relay/cmd/config.go @@ -81,6 +81,14 @@ func NewConfig(ctx *cli.Context) (Config, error) { }, AuthenticationTimeout: ctx.Duration(flags.AuthenticationTimeoutFlag.Name), AuthenticationDisabled: ctx.Bool(flags.AuthenticationDisabledFlag.Name), + Timeouts: relay.TimeoutConfig{ + GetChunksTimeout: ctx.Duration(flags.GetChunksTimeoutFlag.Name), + GetBlobTimeout: ctx.Duration(flags.GetBlobTimeoutFlag.Name), + InternalGetMetadataTimeout: ctx.Duration(flags.InternalGetMetadataTimeoutFlag.Name), + InternalGetBlobTimeout: ctx.Duration(flags.InternalGetBlobTimeoutFlag.Name), + InternalGetProofsTimeout: ctx.Duration(flags.InternalGetProofsTimeoutFlag.Name), + InternalGetCoefficientsTimeout: ctx.Duration(flags.InternalGetCoefficientsTimeoutFlag.Name), + }, }, EthClientConfig: geth.ReadEthClientConfig(ctx), IndexerPullInterval: ctx.Duration(flags.IndexerPullIntervalFlag.Name), diff --git a/relay/cmd/flags/flags.go b/relay/cmd/flags/flags.go index f5bbce330d..1d1af88e23 100644 --- a/relay/cmd/flags/flags.go +++ b/relay/cmd/flags/flags.go @@ -223,6 +223,48 @@ var ( Required: false, EnvVar: common.PrefixEnvVar(envVarPrefix, "AUTHENTICATION_DISABLED"), } + GetChunksTimeoutFlag = cli.DurationFlag{ + Name: common.PrefixFlag(FlagPrefix, "get-chunks-timeout"), + Usage: "Timeout for GetChunks()", + EnvVar: common.PrefixEnvVar(envVarPrefix, "GET_CHUNKS_TIMEOUT"), + Required: false, + Value: 20 * time.Second, + } + GetBlobTimeoutFlag = cli.DurationFlag{ + Name: common.PrefixFlag(FlagPrefix, "get-blob-timeout"), + Usage: "Timeout for GetBlob()", + EnvVar: common.PrefixEnvVar(envVarPrefix, "GET_BLOB_TIMEOUT"), + Required: false, + Value: 20 * time.Second, + } + InternalGetMetadataTimeoutFlag = cli.DurationFlag{ + Name: common.PrefixFlag(FlagPrefix, "internal-get-metadata-timeout"), + Usage: "Timeout for internal metadata fetch", + EnvVar: common.PrefixEnvVar(envVarPrefix, "INTERNAL_GET_METADATA_TIMEOUT"), + Required: false, + Value: 5 * time.Second, + } + InternalGetBlobTimeoutFlag = cli.DurationFlag{ + Name: common.PrefixFlag(FlagPrefix, "internal-get-blob-timeout"), + Usage: "Timeout for internal blob fetch", + EnvVar: common.PrefixEnvVar(envVarPrefix, "INTERNAL_GET_BLOB_TIMEOUT"), + Required: false, + Value: 20 * time.Second, + } + InternalGetProofsTimeoutFlag = cli.DurationFlag{ + Name: common.PrefixFlag(FlagPrefix, "internal-get-proofs-timeout"), + Usage: "Timeout for internal proofs fetch", + EnvVar: common.PrefixEnvVar(envVarPrefix, "INTERNAL_GET_PROOFS_TIMEOUT"), + Required: false, + Value: 5 * time.Second, + } + InternalGetCoefficientsTimeoutFlag = cli.DurationFlag{ + Name: common.PrefixFlag(FlagPrefix, "internal-get-coefficients-timeout"), + Usage: "Timeout for internal coefficients fetch", + EnvVar: common.PrefixEnvVar(envVarPrefix, "INTERNAL_GET_COEFFICIENTS_TIMEOUT"), + Required: false, + Value: 20 * time.Second, + } ) var requiredFlags = []cli.Flag{ @@ -260,6 +302,12 @@ var optionalFlags = []cli.Flag{ GetChunkBytesBurstinessClientFlag, MaxConcurrentGetChunkOpsClientFlag, IndexerPullIntervalFlag, + GetChunksTimeoutFlag, + GetBlobTimeoutFlag, + InternalGetMetadataTimeoutFlag, + InternalGetBlobTimeoutFlag, + InternalGetProofsTimeoutFlag, + InternalGetCoefficientsTimeoutFlag, } var Flags []cli.Flag diff --git a/relay/timeout_config.go b/relay/timeout_config.go index c32475fa69..64c6be96ff 100644 --- a/relay/timeout_config.go +++ b/relay/timeout_config.go @@ -2,8 +2,6 @@ package relay import "time" -// TODO flagify - // TimeoutConfig encapsulates the timeout configuration for the relay server. type TimeoutConfig struct { From e6f4e8e4fa6d7a0a54a5ac19f4fd7501d9deb39b Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 10:55:07 -0600 Subject: [PATCH 33/45] Unit tests. Signed-off-by: Cody Littley --- relay/cache/cached_accessor_test.go | 180 +++++++++++++++++++++++++++- 1 file changed, 179 insertions(+), 1 deletion(-) diff --git a/relay/cache/cached_accessor_test.go b/relay/cache/cached_accessor_test.go index a59b4835b3..9048e3d88a 100644 --- a/relay/cache/cached_accessor_test.go +++ b/relay/cache/cached_accessor_test.go @@ -312,4 +312,182 @@ func TestConcurrencyLimiter(t *testing.T) { wg.Wait() } -// TODO test what happens when the context is cancelled +func TestOriginalRequesterTimesOut(t *testing.T) { + tu.InitializeRandom() + + dataSize := 1024 + + baseData := make(map[int]string) + for i := 0; i < dataSize; i++ { + baseData[i] = tu.RandomString(10) + } + + accessorLock := sync.RWMutex{} + cacheMissCount := atomic.Uint64{} + accessor := func(key int) (*string, error) { + + // Intentionally block if accessorLock is held by the outside scope. + // Used to provoke specific race conditions. + accessorLock.Lock() + defer accessorLock.Unlock() + + cacheMissCount.Add(1) + + str := baseData[key] + return &str, nil + } + cacheSize := rand.Intn(dataSize) + 1 + + ca, err := NewCachedAccessor(cacheSize, 0, accessor) + require.NoError(t, err) + + // Lock the accessor. This will cause all cache misses to block. + accessorLock.Lock() + + // Start several goroutines that will attempt to access the same key. + wg := sync.WaitGroup{} + wg.Add(10) + errCount := atomic.Uint64{} + for i := 0; i < 10; i++ { + + var ctx context.Context + if i == 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(context.Background(), 1*time.Millisecond) + defer cancel() + } else { + ctx = context.Background() + } + + go func() { + defer wg.Done() + value, err := ca.Get(ctx, 0) + + if err != nil { + errCount.Add(1) + } else { + require.Equal(t, baseData[0], *value) + } + }() + + if i == 0 { + // Give the thread with the small timeout a chance to start. Although this sleep statement is + // not required for the test to pass, it makes it much more likely for this test to exercise + // the intended code pathway. + time.Sleep(100 * time.Millisecond) + } + } + + // Unlock the accessor. This will allow the goroutines to proceed. + accessorLock.Unlock() + + // Wait for the goroutines to finish. + wg.Wait() + + // Only one of the goroutines should have called into the accessor. + require.Equal(t, uint64(1), cacheMissCount.Load()) + + // At most, one goroutine should have timed out. + require.True(t, errCount.Load() <= 1) + + // Fetching the key again should not result in a cache miss. + value, err := ca.Get(context.Background(), 0) + require.NoError(t, err) + require.Equal(t, baseData[0], *value) + require.Equal(t, uint64(1), cacheMissCount.Load()) + + // The internal lookupsInProgress map should no longer contain the key. + require.Equal(t, 0, len(ca.(*cachedAccessor[int, *string]).lookupsInProgress)) +} + +func TestSecondaryRequesterTimesOut(t *testing.T) { + tu.InitializeRandom() + + dataSize := 1024 + + baseData := make(map[int]string) + for i := 0; i < dataSize; i++ { + baseData[i] = tu.RandomString(10) + } + + accessorLock := sync.RWMutex{} + cacheMissCount := atomic.Uint64{} + accessor := func(key int) (*string, error) { + + // Intentionally block if accessorLock is held by the outside scope. + // Used to provoke specific race conditions. + accessorLock.Lock() + defer accessorLock.Unlock() + + cacheMissCount.Add(1) + + str := baseData[key] + return &str, nil + } + cacheSize := rand.Intn(dataSize) + 1 + + ca, err := NewCachedAccessor(cacheSize, 0, accessor) + require.NoError(t, err) + + // Lock the accessor. This will cause all cache misses to block. + accessorLock.Lock() + + // Start several goroutines that will attempt to access the same key. + wg := sync.WaitGroup{} + wg.Add(10) + errCount := atomic.Uint64{} + for i := 0; i < 10; i++ { + + var ctx context.Context + if i == 1 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(context.Background(), 1*time.Millisecond) + defer cancel() + } else { + ctx = context.Background() + } + + go func() { + defer wg.Done() + value, err := ca.Get(ctx, 0) + + if err != nil { + errCount.Add(1) + } else { + require.Equal(t, baseData[0], *value) + } + }() + + if i == 0 { + // Give the thread with the context that won't time out a chance to start. Although this sleep statement is + // not required for the test to pass, it makes it much more likely for this test to exercise + // the intended code pathway. + time.Sleep(100 * time.Millisecond) + } + } + + // Give context a chance to time out. Although this sleep statement is not required for the test to pass, it makes + // it much more likely for this test to exercise the intended code pathway. + time.Sleep(100 * time.Millisecond) + + // Unlock the accessor. This will allow the goroutines to proceed. + accessorLock.Unlock() + + // Wait for the goroutines to finish. + wg.Wait() + + // Only one of the goroutines should have called into the accessor. + require.Equal(t, uint64(1), cacheMissCount.Load()) + + // At most, one goroutine should have timed out. + require.True(t, errCount.Load() <= 1) + + // Fetching the key again should not result in a cache miss. + value, err := ca.Get(context.Background(), 0) + require.NoError(t, err) + require.Equal(t, baseData[0], *value) + require.Equal(t, uint64(1), cacheMissCount.Load()) + + // The internal lookupsInProgress map should no longer contain the key. + require.Equal(t, 0, len(ca.(*cachedAccessor[int, *string]).lookupsInProgress)) +} From 7f95615b12fc82e624edbc6fe64cca787ed687a5 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 11:41:35 -0600 Subject: [PATCH 34/45] Added base framework. Signed-off-by: Cody Littley --- common/queue/linked_queue.go | 52 +++++++++++++++++++ common/queue/linked_queue_test.go | 1 + common/queue/queue.go | 18 +++++++ relay/auth/authenticator.go | 2 +- relay/cache/cache.go | 30 +++++++++++ relay/cache/fifo-cache.go | 83 +++++++++++++++++++++++++++++++ 6 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 common/queue/linked_queue.go create mode 100644 common/queue/linked_queue_test.go create mode 100644 common/queue/queue.go create mode 100644 relay/cache/cache.go create mode 100644 relay/cache/fifo-cache.go diff --git a/common/queue/linked_queue.go b/common/queue/linked_queue.go new file mode 100644 index 0000000000..2142759e77 --- /dev/null +++ b/common/queue/linked_queue.go @@ -0,0 +1,52 @@ +package queue + +var _ Queue[string] = &LinkedQueue[string]{} + +// LinkedQueue is a queue that uses a linked list to store values. It is not thread safe. +type LinkedQueue[T any] struct { + front *node[T] + back *node[T] + size int +} + +// node is a single element in the linked list. +type node[T any] struct { + value T + next *node[T] +} + +func (l *LinkedQueue[T]) Push(value T) { + if l.size == 0 { + l.front = &node[T]{value: value} + l.back = l.front + } else { + n := &node[T]{value: value} + l.back.next = n + l.back = n + } + l.size++ +} + +func (l *LinkedQueue[T]) Pop() (T, bool) { + if l.size == 0 { + var zero T + return zero, false + } + + value := l.front.value + l.front = l.front.next + l.size-- + return value, true +} + +func (l *LinkedQueue[T]) Peek() (T, bool) { + if l.size == 0 { + var zero T + return zero, false + } + return l.front.value, true +} + +func (l *LinkedQueue[T]) Size() int { + return l.size +} diff --git a/common/queue/linked_queue_test.go b/common/queue/linked_queue_test.go new file mode 100644 index 0000000000..c969bad24a --- /dev/null +++ b/common/queue/linked_queue_test.go @@ -0,0 +1 @@ +package queue diff --git a/common/queue/queue.go b/common/queue/queue.go new file mode 100644 index 0000000000..8b605bd7f3 --- /dev/null +++ b/common/queue/queue.go @@ -0,0 +1,18 @@ +package queue + +// Queue is an interface for a generic queue. It's absurd there isn't an equivalent in the standard golang libraries. +type Queue[T any] interface { + // Push adds a value to the queue. + Push(value T) + + // Pop removes and returns the value at the front of the queue. + // If the queue is empty, the second return value will be false. + Pop() (T, bool) + + // Peek returns the value at the front of the queue without removing it. + // If the queue is empty, the second return value will be false. + Peek() (T, bool) + + // Size returns the number of values in the queue. + Size() int +} diff --git a/relay/auth/authenticator.go b/relay/auth/authenticator.go index f8c9bdbfc7..d35660e086 100644 --- a/relay/auth/authenticator.go +++ b/relay/auth/authenticator.go @@ -36,7 +36,7 @@ type requestAuthenticator struct { authenticatedClients map[string]struct{} // authenticationTimeouts is a list of authentications that have been performed, along with their expiration times. - authenticationTimeouts []*authenticationTimeout + authenticationTimeouts []*authenticationTimeout // TODO use a queue here why not // authenticationTimeoutDuration is the duration for which an auth is valid. // If this is zero, then auth saving is disabled, and each request will be authenticated independently. diff --git a/relay/cache/cache.go b/relay/cache/cache.go new file mode 100644 index 0000000000..034a5fdd20 --- /dev/null +++ b/relay/cache/cache.go @@ -0,0 +1,30 @@ +package cache + +// WeightCalculator is a function that calculates the weight of a key-value pair in a Cache. +// By default, the weight of a key-value pair is 1. Cache capacity is always specified in terms of +// the weight of the key-value pairs it can hold, rather than the number of key-value pairs. +// +// Unless otherwise noted, Cache implementations are not required to be thread safe. +type WeightCalculator[K comparable, V any] func(key K, value V) uint64 + +// Cache is an interface for a generic cache. +type Cache[K comparable, V any] interface { + // Get returns the value associated with the key, and a boolean indicating whether the key was found in the cache. + Get(key K) (V, bool) + + // Put adds a key-value pair to the cache. After this operation, values may be dropped if the total weight + // exceeds the configured maximum weight. Will ignore the new value if it exceeds the maximum weight + // of the cache in and of itself. + Put(key K, value V) + + // WithWeightCalculator sets the weight calculator for the cache. May only be called + // when the cache is empty. The weight calculator should be an idempotent function that + // always returns the same output given the same input. + WithWeightCalculator(weightCalculator WeightCalculator[K, V]) error + + // Size returns the number of key-value pairs in the cache. + Size() int + + // Weight returns the total weight of the key-value pairs in the cache. + Weight() uint64 +} diff --git a/relay/cache/fifo-cache.go b/relay/cache/fifo-cache.go new file mode 100644 index 0000000000..dc3388e084 --- /dev/null +++ b/relay/cache/fifo-cache.go @@ -0,0 +1,83 @@ +package cache + +import ( + "errors" + "github.com/Layr-Labs/eigenda/common/queue" +) + +var _ Cache[string, string] = &FIFOCache[string, string]{} + +// FIFOCache is a cache that evicts the least recently added item when the cache is full. Useful for situations +// where time of addition is a better predictor of future access than time of most recent access. +type FIFOCache[K comparable, V any] struct { + weightCalculator WeightCalculator[K, V] + + currentWeight uint64 + maxWeight uint64 + data map[K]V + expirationQueue queue.Queue[K] +} + +// NewFIFOCache creates a new FIFOCache. +func NewFIFOCache[K comparable, V any](maxWeight uint64) *FIFOCache[K, V] { + defaultWeightCalculator := func(key K, value V) uint64 { + return uint64(1) + } + + return &FIFOCache[K, V]{ + maxWeight: maxWeight, + data: make(map[K]V), + weightCalculator: defaultWeightCalculator, + } +} + +func (f *FIFOCache[K, V]) Get(key K) (V, bool) { + val, ok := f.data[key] + return val, ok +} + +func (f *FIFOCache[K, V]) Put(key K, value V) { + weight := f.weightCalculator(key, value) + if weight > f.maxWeight { + // this item won't fit in the cache no matter what we evict + return + } + + old, ok := f.data[key] + f.currentWeight += weight + f.data[key] = value + if ok { + oldWeight := f.weightCalculator(key, old) + f.currentWeight -= oldWeight + } else { + f.expirationQueue.Push(key) + } + + if f.currentWeight < f.maxWeight { + // no need to evict anything + return + } + + for f.currentWeight > f.maxWeight { + keyToEvict, _ := f.expirationQueue.Pop() + weightToEvict := f.weightCalculator(keyToEvict, f.data[keyToEvict]) + delete(f.data, keyToEvict) + f.currentWeight -= weightToEvict + } +} + +func (f *FIFOCache[K, V]) WithWeightCalculator(weightCalculator WeightCalculator[K, V]) error { + if f.Size() > 0 { + return errors.New("cannot set weight calculator on non-empty cache") + } + f.weightCalculator = weightCalculator + return nil +} + +func (f *FIFOCache[K, V]) Size() int { + return len(f.data) +} + +func (f *FIFOCache[K, V]) Weight() uint64 { + return f.currentWeight +} From e11bcf61aee374e87947f8f8d895d39fcf65a819 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 11:53:40 -0600 Subject: [PATCH 35/45] Add unit test for queue. Signed-off-by: Cody Littley --- common/queue/linked_queue_test.go | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/common/queue/linked_queue_test.go b/common/queue/linked_queue_test.go index c969bad24a..7e790ef8f6 100644 --- a/common/queue/linked_queue_test.go +++ b/common/queue/linked_queue_test.go @@ -1 +1,58 @@ package queue + +import ( + tu "github.com/Layr-Labs/eigenda/common/testutils" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" + "testing" +) + +func TestEmptyQueue(t *testing.T) { + var q LinkedQueue[int] + require.Equal(t, 0, q.Size()) + + next, ok := q.Peek() + require.False(t, ok) + require.Equal(t, 0, next) + + next, ok = q.Pop() + require.False(t, ok) + require.Equal(t, 0, next) + + require.Equal(t, 0, q.Size()) +} + +func TestRandomOperations(t *testing.T) { + tu.InitializeRandom() + + var q LinkedQueue[int] + expectedValues := make([]int, 0) + + for i := 0; i < 1000; i++ { + if rand.Int()%2 == 0 || len(expectedValues) == 0 { + // push an item + itemToPush := rand.Int() + q.Push(itemToPush) + expectedValues = append(expectedValues, itemToPush) + } else { + // pop an item + + next, ok := q.Pop() + expectedNext := expectedValues[0] + expectedValues = expectedValues[1:] + + require.True(t, ok) + require.Equal(t, expectedNext, next) + } + + require.Equal(t, len(expectedValues), q.Size()) + + next, ok := q.Peek() + if len(expectedValues) == 0 { + require.False(t, ok) + } else { + require.True(t, ok) + require.Equal(t, expectedValues[0], next) + } + } +} From 0948a5c85ef8667256e91969a3b0529523960f55 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 20 Nov 2024 12:32:40 -0600 Subject: [PATCH 36/45] Unit tests. Signed-off-by: Cody Littley --- relay/cache/fifo-cache.go | 1 + relay/cache/fifo_cache_test.go | 142 +++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 relay/cache/fifo_cache_test.go diff --git a/relay/cache/fifo-cache.go b/relay/cache/fifo-cache.go index dc3388e084..1b994ae25e 100644 --- a/relay/cache/fifo-cache.go +++ b/relay/cache/fifo-cache.go @@ -28,6 +28,7 @@ func NewFIFOCache[K comparable, V any](maxWeight uint64) *FIFOCache[K, V] { maxWeight: maxWeight, data: make(map[K]V), weightCalculator: defaultWeightCalculator, + expirationQueue: &queue.LinkedQueue[K]{}, } } diff --git a/relay/cache/fifo_cache_test.go b/relay/cache/fifo_cache_test.go new file mode 100644 index 0000000000..e56b1db055 --- /dev/null +++ b/relay/cache/fifo_cache_test.go @@ -0,0 +1,142 @@ +package cache + +import ( + tu "github.com/Layr-Labs/eigenda/common/testutils" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" + "testing" +) + +func TestExpirationOrder(t *testing.T) { + tu.InitializeRandom() + + maxWeight := uint64(10 + rand.Intn(10)) + c := NewFIFOCache[int, int](maxWeight) + + require.Equal(t, uint64(0), c.Weight()) + require.Equal(t, 0, c.Size()) + + expectedValues := make(map[int]int) + + // Fill up the cache. Everything should have weight 1. + for i := 1; i <= int(maxWeight); i++ { + + value := rand.Int() + expectedValues[i] = value + + // The value shouldn't be present yet + v, ok := c.Get(i) + require.False(t, ok) + require.Equal(t, 0, v) + + c.Put(i, value) + + require.Equal(t, uint64(i), c.Weight()) + require.Equal(t, i, c.Size()) + } + + // Verify that all expected values are present. + for k, v := range expectedValues { + value, ok := c.Get(k) + require.True(t, ok) + require.Equal(t, v, value) + } + + // Push the old values out of the queue one at a time. + for i := 1; i <= int(maxWeight); i++ { + value := rand.Int() + expectedValues[-i] = value + delete(expectedValues, i) + + // The value shouldn't be present yet + v, ok := c.Get(-i) + require.False(t, ok) + require.Equal(t, 0, v) + + c.Put(-i, value) + + require.Equal(t, maxWeight, c.Weight()) + require.Equal(t, int(maxWeight), c.Size()) + + // verify that the purged value is specifically not present + value, ok = c.Get(i) + require.False(t, ok) + + // verify that only the expected values have been purged. Has the added benefit of randomly + // reading all the values in the cache, which for a FIFO cache should not influence the order + // that we purge values. + for kk, vv := range expectedValues { + value, ok = c.Get(kk) + require.True(t, ok) + require.Equal(t, vv, value) + } + } +} + +func TestWeightedValues(t *testing.T) { + tu.InitializeRandom() + + maxWeight := uint64(100 + rand.Intn(100)) + + // For this test, weight is simply the key. + weightCalculator := func(key int, value int) uint64 { + return uint64(key) + } + + c := NewFIFOCache[int, int](maxWeight) + err := c.WithWeightCalculator(weightCalculator) + require.NoError(t, err) + + expectedValues := make(map[int]int) + + require.Equal(t, uint64(0), c.Weight()) + require.Equal(t, 0, c.Size()) + + highestUndeletedKey := 0 + expectedWeight := uint64(0) + for nextKey := 0; nextKey <= int(maxWeight); nextKey++ { + value := rand.Int() + c.Put(nextKey, value) + expectedValues[nextKey] = value + expectedWeight += uint64(nextKey) + + // simulate the expected removal + for expectedWeight > maxWeight { + delete(expectedValues, highestUndeletedKey) + expectedWeight -= uint64(highestUndeletedKey) + highestUndeletedKey++ + } + + require.Equal(t, expectedWeight, c.Weight()) + require.Equal(t, len(expectedValues), c.Size()) + + // Update a random existing key. Shouldn't affect the weight or removal order. + for k, _ := range expectedValues { + value = rand.Int() + c.Put(k, value) + expectedValues[k] = value + break + } + + // verify that all expected values are present + for k, v := range expectedValues { + var ok bool + value, ok = c.Get(k) + require.True(t, ok) + require.Equal(t, v, value) + } + } + + // Attempting to insert a value that exceeds the max weight should have no effect. + c.Put(int(maxWeight)+1, rand.Int()) + + for k, v := range expectedValues { + value, ok := c.Get(k) + require.True(t, ok) + require.Equal(t, v, value) + } + + // Sanity check, attempting to update the weight calculator function at this point should fail. + err = c.WithWeightCalculator(weightCalculator) + require.Error(t, err) +} From 3f06fff822ac36362da4d44d78254daf33db8a26 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Thu, 21 Nov 2024 09:16:49 -0600 Subject: [PATCH 37/45] fix bugs Signed-off-by: Cody Littley --- relay/auth/authenticator.go | 8 +++++--- relay/server.go | 9 +++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/relay/auth/authenticator.go b/relay/auth/authenticator.go index a85c1f4865..5a4f4f9425 100644 --- a/relay/auth/authenticator.go +++ b/relay/auth/authenticator.go @@ -17,6 +17,7 @@ type RequestAuthenticator interface { // The origin is the address of the peer that sent the request. This may be used to cache auth results // in order to save server resources. AuthenticateGetChunksRequest( + ctx context.Context, origin string, request *pb.GetChunksRequest, now time.Time) error @@ -96,6 +97,7 @@ func (a *requestAuthenticator) preloadCache() error { } func (a *requestAuthenticator) AuthenticateGetChunksRequest( + ctx context.Context, origin string, request *pb.GetChunksRequest, now time.Time) error { @@ -105,7 +107,7 @@ func (a *requestAuthenticator) AuthenticateGetChunksRequest( return nil } - key, err := a.getOperatorKey(core.OperatorID(request.OperatorId)) + key, err := a.getOperatorKey(ctx, core.OperatorID(request.OperatorId)) if err != nil { return fmt.Errorf("failed to get operator key: %w", err) } @@ -131,7 +133,7 @@ func (a *requestAuthenticator) AuthenticateGetChunksRequest( } // getOperatorKey returns the public key of the operator with the given ID, caching the result. -func (a *requestAuthenticator) getOperatorKey(operatorID core.OperatorID) (*core.G2Point, error) { +func (a *requestAuthenticator) getOperatorKey(ctx context.Context, operatorID core.OperatorID) (*core.G2Point, error) { key, ok := a.keyCache.Get(operatorID) if ok { return key, nil @@ -141,7 +143,7 @@ func (a *requestAuthenticator) getOperatorKey(operatorID core.OperatorID) (*core if err != nil { return nil, fmt.Errorf("failed to get current block number: %w", err) } - operators, err := a.ics.GetIndexedOperators(context.Background(), blockNumber) + operators, err := a.ics.GetIndexedOperators(ctx, blockNumber) if err != nil { return nil, fmt.Errorf("failed to get operators: %w", err) } diff --git a/relay/server.go b/relay/server.go index 7df48b0e2f..7a5445148d 100644 --- a/relay/server.go +++ b/relay/server.go @@ -179,7 +179,7 @@ func NewServer( // GetBlob retrieves a blob stored by the relay. func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.GetBlobReply, error) { - if s.config.Timeouts.GetChunksTimeout > 0 { + if s.config.Timeouts.GetBlobTimeout > 0 { var cancel context.CancelFunc ctx, cancel = context.WithTimeout(ctx, s.config.Timeouts.GetBlobTimeout) defer cancel() @@ -226,6 +226,11 @@ func (s *Server) GetBlob(ctx context.Context, request *pb.GetBlobRequest) (*pb.G // GetChunks retrieves chunks from blobs stored by the relay. func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (*pb.GetChunksReply, error) { + if s.config.Timeouts.GetChunksTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, s.config.Timeouts.GetChunksTimeout) + defer cancel() + } if len(request.ChunkRequests) <= 0 { return nil, fmt.Errorf("no chunk requests provided") @@ -242,7 +247,7 @@ func (s *Server) GetChunks(ctx context.Context, request *pb.GetChunksRequest) (* } clientAddress := client.Addr.String() - err := s.authenticator.AuthenticateGetChunksRequest(clientAddress, request, time.Now()) + err := s.authenticator.AuthenticateGetChunksRequest(ctx, clientAddress, request, time.Now()) if err != nil { return nil, fmt.Errorf("auth failed: %w", err) } From 43d8c911404b959191d37fd2ca6d42e18973a2cc Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Thu, 21 Nov 2024 09:24:04 -0600 Subject: [PATCH 38/45] lint Signed-off-by: Cody Littley --- relay/auth/authenticator_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/relay/auth/authenticator_test.go b/relay/auth/authenticator_test.go index debcbccc61..11d7b3d18e 100644 --- a/relay/auth/authenticator_test.go +++ b/relay/auth/authenticator_test.go @@ -69,6 +69,7 @@ func TestValidRequest(t *testing.T) { now := time.Now() err = authenticator.AuthenticateGetChunksRequest( + context.Background(), "foobar", request, now) @@ -83,12 +84,14 @@ func TestValidRequest(t *testing.T) { start := now for now.Before(start.Add(timeout)) { err = authenticator.AuthenticateGetChunksRequest( + context.Background(), "foobar", invalidRequest, now) require.NoError(t, err) err = authenticator.AuthenticateGetChunksRequest( + context.Background(), "baz", invalidRequest, now) @@ -99,6 +102,7 @@ func TestValidRequest(t *testing.T) { // After the timeout elapses, new requests should trigger authentication. err = authenticator.AuthenticateGetChunksRequest( + context.Background(), "foobar", invalidRequest, now) @@ -132,6 +136,7 @@ func TestAuthenticationSavingDisabled(t *testing.T) { now := time.Now() err = authenticator.AuthenticateGetChunksRequest( + context.Background(), "foobar", request, now) @@ -144,6 +149,7 @@ func TestAuthenticationSavingDisabled(t *testing.T) { invalidRequest.OperatorSignature = signature // the previous signature is invalid here err = authenticator.AuthenticateGetChunksRequest( + context.Background(), "foobar", invalidRequest, now) @@ -174,6 +180,7 @@ func TestNonExistingClient(t *testing.T) { request.OperatorId = invalidOperatorID err = authenticator.AuthenticateGetChunksRequest( + context.Background(), "foobar", request, time.Now()) @@ -205,6 +212,7 @@ func TestBadSignature(t *testing.T) { now := time.Now() err = authenticator.AuthenticateGetChunksRequest( + context.Background(), "foobar", request, now) @@ -217,6 +225,7 @@ func TestBadSignature(t *testing.T) { request.OperatorSignature[0] = request.OperatorSignature[0] ^ 1 err = authenticator.AuthenticateGetChunksRequest( + context.Background(), "foobar", request, now) From 0903529e1377861499551cb9e9a04d560794f9c7 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Thu, 21 Nov 2024 09:32:46 -0600 Subject: [PATCH 39/45] Rename cached accessor to cache accessor. Signed-off-by: Cody Littley --- relay/blob_provider.go | 2 +- .../{cached_accessor.go => cache_accessor.go} | 24 +++++++++---------- ...ccessor_test.go => cache_accessor_test.go} | 8 +++---- relay/cache/fifo_cache_test.go | 4 ++-- relay/chunk_provider.go | 2 +- relay/metadata_provider.go | 2 +- 6 files changed, 21 insertions(+), 21 deletions(-) rename relay/cache/{cached_accessor.go => cache_accessor.go} (86%) rename relay/cache/{cached_accessor_test.go => cache_accessor_test.go} (97%) diff --git a/relay/blob_provider.go b/relay/blob_provider.go index 9b9863bfda..8c4ce98bdd 100644 --- a/relay/blob_provider.go +++ b/relay/blob_provider.go @@ -20,7 +20,7 @@ type blobProvider struct { blobStore *blobstore.BlobStore // blobCache is an LRU cache of blobs. - blobCache cache.CachedAccessor[v2.BlobKey, []byte] + blobCache cache.CacheAccessor[v2.BlobKey, []byte] // fetchTimeout is the maximum time to wait for a blob fetch operation to complete. fetchTimeout time.Duration diff --git a/relay/cache/cached_accessor.go b/relay/cache/cache_accessor.go similarity index 86% rename from relay/cache/cached_accessor.go rename to relay/cache/cache_accessor.go index 73ca575e4e..b8b9c8d3cd 100644 --- a/relay/cache/cached_accessor.go +++ b/relay/cache/cache_accessor.go @@ -6,16 +6,16 @@ import ( "sync" ) -// CachedAccessor is an interface for accessing a resource that is cached. It assumes that cache misses +// CacheAccessor is an interface for accessing a resource that is cached. It assumes that cache misses // are expensive, and prevents multiple concurrent cache misses for the same key. -type CachedAccessor[K comparable, V any] interface { +type CacheAccessor[K comparable, V any] interface { // Get returns the value for the given key. If the value is not in the cache, it will be fetched using the Accessor. // If the context is cancelled, the function may abort early. If multiple goroutines request the same key, // cancellation of one request will not affect the others. Get(ctx context.Context, key K) (V, error) } -// Accessor is function capable of fetching a value from a resource. Used by CachedAccessor when there is a cache miss. +// Accessor is function capable of fetching a value from a resource. Used by CacheAccessor when there is a cache miss. type Accessor[K comparable, V any] func(key K) (V, error) // accessResult is a struct that holds the result of an Accessor call. @@ -28,15 +28,15 @@ type accessResult[V any] struct { err error } -var _ CachedAccessor[string, string] = &cachedAccessor[string, string]{} +var _ CacheAccessor[string, string] = &cacheAccessor[string, string]{} // Future work: the cache used in this implementation is suboptimal when storing items that have a large // variance in size. The current implementation uses a fixed size cache, which requires the cached to be // sized to the largest item that will be stored. This cache should be replaced with an implementation // whose size can be specified by memory footprint in bytes. -// cachedAccessor is an implementation of CachedAccessor. -type cachedAccessor[K comparable, V any] struct { +// cacheAccessor is an implementation of CacheAccessor. +type cacheAccessor[K comparable, V any] struct { // lookupsInProgress has an entry for each key that is currently being looked up via the accessor. The value // is written into the channel when it is eventually fetched. If a key is requested more than once while a @@ -57,7 +57,7 @@ type cachedAccessor[K comparable, V any] struct { accessor Accessor[K, V] } -// NewCachedAccessor creates a new CachedAccessor. The cacheSize parameter specifies the maximum number of items +// NewCachedAccessor creates a new CacheAccessor. The cacheSize parameter specifies the maximum number of items // that can be stored in the cache. The concurrencyLimit parameter specifies the maximum number of concurrent // lookups that can be in progress at any given time. If a greater number of lookups are requested, the excess // lookups will block until a lookup completes. If concurrencyLimit is zero, then no limits are imposed. The accessor @@ -65,7 +65,7 @@ type cachedAccessor[K comparable, V any] struct { func NewCachedAccessor[K comparable, V any]( cacheSize int, concurrencyLimit int, - accessor Accessor[K, V]) (CachedAccessor[K, V], error) { + accessor Accessor[K, V]) (CacheAccessor[K, V], error) { cache, err := lru.New[K, V](cacheSize) if err != nil { @@ -79,7 +79,7 @@ func NewCachedAccessor[K comparable, V any]( concurrencyLimiter = make(chan struct{}, concurrencyLimit) } - return &cachedAccessor[K, V]{ + return &cacheAccessor[K, V]{ cache: cache, concurrencyLimiter: concurrencyLimiter, accessor: accessor, @@ -95,7 +95,7 @@ func newAccessResult[V any]() *accessResult[V] { return result } -func (c *cachedAccessor[K, V]) Get(ctx context.Context, key K) (V, error) { +func (c *cacheAccessor[K, V]) Get(ctx context.Context, key K) (V, error) { c.cacheLock.Lock() // first, attempt to get the value from the cache @@ -126,7 +126,7 @@ func (c *cachedAccessor[K, V]) Get(ctx context.Context, key K) (V, error) { // waitForResult waits for the result of a lookup that was initiated by another requester and returns it // when it becomes is available. This method will return quickly if the provided context is cancelled. // Doing so does not disrupt the other requesters that are also waiting for this result. -func (c *cachedAccessor[K, V]) waitForResult(ctx context.Context, result *accessResult[V]) (V, error) { +func (c *cacheAccessor[K, V]) waitForResult(ctx context.Context, result *accessResult[V]) (V, error) { wgChan := make(chan struct{}, 1) go func() { // Wait inside this goroutine for select statement compatibility. @@ -146,7 +146,7 @@ func (c *cachedAccessor[K, V]) waitForResult(ctx context.Context, result *access // fetchResult fetches the value for the given key and returns it. If the context is cancelled before the value // is fetched, the function will return early. If the fetch is successful, the value will be added to the cache. -func (c *cachedAccessor[K, V]) fetchResult(ctx context.Context, key K, result *accessResult[V]) (V, error) { +func (c *cacheAccessor[K, V]) fetchResult(ctx context.Context, key K, result *accessResult[V]) (V, error) { // Perform the work in a background goroutine. This allows us to return early if the context is cancelled // without disrupting the fetch operation that other requesters may be waiting for. diff --git a/relay/cache/cached_accessor_test.go b/relay/cache/cache_accessor_test.go similarity index 97% rename from relay/cache/cached_accessor_test.go rename to relay/cache/cache_accessor_test.go index 9048e3d88a..01996222b2 100644 --- a/relay/cache/cached_accessor_test.go +++ b/relay/cache/cache_accessor_test.go @@ -184,7 +184,7 @@ func ParallelAccessTest(t *testing.T, sleepEnabled bool) { require.Equal(t, uint64(1), cacheMissCount.Load()) // The internal lookupsInProgress map should no longer contain the key. - require.Equal(t, 0, len(ca.(*cachedAccessor[int, *string]).lookupsInProgress)) + require.Equal(t, 0, len(ca.(*cacheAccessor[int, *string]).lookupsInProgress)) } func TestParallelAccess(t *testing.T) { @@ -253,7 +253,7 @@ func TestParallelAccessWithError(t *testing.T) { require.Equal(t, count+1, cacheMissCount.Load()) // The internal lookupsInProgress map should no longer contain the key. - require.Equal(t, 0, len(ca.(*cachedAccessor[int, *string]).lookupsInProgress)) + require.Equal(t, 0, len(ca.(*cacheAccessor[int, *string]).lookupsInProgress)) } func TestConcurrencyLimiter(t *testing.T) { @@ -397,7 +397,7 @@ func TestOriginalRequesterTimesOut(t *testing.T) { require.Equal(t, uint64(1), cacheMissCount.Load()) // The internal lookupsInProgress map should no longer contain the key. - require.Equal(t, 0, len(ca.(*cachedAccessor[int, *string]).lookupsInProgress)) + require.Equal(t, 0, len(ca.(*cacheAccessor[int, *string]).lookupsInProgress)) } func TestSecondaryRequesterTimesOut(t *testing.T) { @@ -489,5 +489,5 @@ func TestSecondaryRequesterTimesOut(t *testing.T) { require.Equal(t, uint64(1), cacheMissCount.Load()) // The internal lookupsInProgress map should no longer contain the key. - require.Equal(t, 0, len(ca.(*cachedAccessor[int, *string]).lookupsInProgress)) + require.Equal(t, 0, len(ca.(*cacheAccessor[int, *string]).lookupsInProgress)) } diff --git a/relay/cache/fifo_cache_test.go b/relay/cache/fifo_cache_test.go index e56b1db055..3cafc2b198 100644 --- a/relay/cache/fifo_cache_test.go +++ b/relay/cache/fifo_cache_test.go @@ -59,7 +59,7 @@ func TestExpirationOrder(t *testing.T) { require.Equal(t, int(maxWeight), c.Size()) // verify that the purged value is specifically not present - value, ok = c.Get(i) + _, ok = c.Get(i) require.False(t, ok) // verify that only the expected values have been purged. Has the added benefit of randomly @@ -111,7 +111,7 @@ func TestWeightedValues(t *testing.T) { require.Equal(t, len(expectedValues), c.Size()) // Update a random existing key. Shouldn't affect the weight or removal order. - for k, _ := range expectedValues { + for k := range expectedValues { value = rand.Int() c.Put(k, value) expectedValues[k] = value diff --git a/relay/chunk_provider.go b/relay/chunk_provider.go index 48ece7c3cd..61a9d973d4 100644 --- a/relay/chunk_provider.go +++ b/relay/chunk_provider.go @@ -20,7 +20,7 @@ type chunkProvider struct { // metadataCache is an LRU cache of blob metadata. Each relay is authorized to serve data assigned to one or more // relay IDs. Blobs that do not belong to one of the relay IDs assigned to this server will not be in the cache. - frameCache cache.CachedAccessor[blobKeyWithMetadata, []*encoding.Frame] + frameCache cache.CacheAccessor[blobKeyWithMetadata, []*encoding.Frame] // chunkReader is used to read chunks from the chunk store. chunkReader chunkstore.ChunkReader diff --git a/relay/metadata_provider.go b/relay/metadata_provider.go index 31a2e5bda4..9f7eced495 100644 --- a/relay/metadata_provider.go +++ b/relay/metadata_provider.go @@ -36,7 +36,7 @@ type metadataProvider struct { // metadataCache is an LRU cache of blob metadata. Blobs that do not belong to one of the relay shards // assigned to this server will not be in the cache. - metadataCache cache.CachedAccessor[v2.BlobKey, *blobMetadata] + metadataCache cache.CacheAccessor[v2.BlobKey, *blobMetadata] // relayIDSet is the set of relay IDs assigned to this relay. This relay will refuse to serve metadata for blobs // that are not assigned to one of these IDs. From 04480ff925c4918ecb95759c4d796d9106878a82 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Thu, 21 Nov 2024 10:16:47 -0600 Subject: [PATCH 40/45] Integrate new cache. Signed-off-by: Cody Littley --- relay/blob_provider.go | 18 +++++++++++++++--- relay/blob_provider_test.go | 4 ++-- relay/cache/cache_accessor.go | 18 ++++++------------ relay/cache/cache_accessor_test.go | 27 ++++++++++++++++++++------- relay/chunk_provider.go | 20 ++++++++++++++++---- relay/chunk_provider_test.go | 4 ++-- relay/cmd/config.go | 4 ++-- relay/cmd/flags/flags.go | 12 ++++++------ relay/metadata_provider.go | 6 ++++-- relay/server.go | 8 ++++---- 10 files changed, 77 insertions(+), 44 deletions(-) diff --git a/relay/blob_provider.go b/relay/blob_provider.go index 8c4ce98bdd..fd79e8f542 100644 --- a/relay/blob_provider.go +++ b/relay/blob_provider.go @@ -31,7 +31,7 @@ func newBlobProvider( ctx context.Context, logger logging.Logger, blobStore *blobstore.BlobStore, - blobCacheSize int, + blobCacheSize uint64, maxIOConcurrency int, fetchTimeout time.Duration) (*blobProvider, error) { @@ -42,15 +42,27 @@ func newBlobProvider( fetchTimeout: fetchTimeout, } - c, err := cache.NewCachedAccessor[v2.BlobKey, []byte](blobCacheSize, maxIOConcurrency, server.fetchBlob) + c := cache.NewFIFOCache[v2.BlobKey, []byte](blobCacheSize) + err := c.WithWeightCalculator(computeBlobCacheWeight) if err != nil { return nil, fmt.Errorf("error creating blob cache: %w", err) } - server.blobCache = c + + cacheAccessor, err := cache.NewCacheAccessor[v2.BlobKey, []byte](c, maxIOConcurrency, server.fetchBlob) + if err != nil { + return nil, fmt.Errorf("error creating blob cache: %w", err) + } + server.blobCache = cacheAccessor return server, nil } +// computeChunkCacheWeight computes the 'weight' of the blob for the cache. The weight of a blob +// is equal to its size, in bytes. +func computeBlobCacheWeight(key v2.BlobKey, value []byte) uint64 { + return uint64(len(value)) +} + // GetBlob retrieves a blob from the blob store. func (s *blobProvider) GetBlob(ctx context.Context, blobKey v2.BlobKey) ([]byte, error) { data, err := s.blobCache.Get(ctx, blobKey) diff --git a/relay/blob_provider_test.go b/relay/blob_provider_test.go index 9309461c65..22368a5d5b 100644 --- a/relay/blob_provider_test.go +++ b/relay/blob_provider_test.go @@ -39,7 +39,7 @@ func TestReadWrite(t *testing.T) { context.Background(), logger, blobStore, - 10, + 1024*1024*32, 32, 10*time.Second) require.NoError(t, err) @@ -76,7 +76,7 @@ func TestNonExistentBlob(t *testing.T) { context.Background(), logger, blobStore, - 10, + 1024*1024*32, 32, 10*time.Second) require.NoError(t, err) diff --git a/relay/cache/cache_accessor.go b/relay/cache/cache_accessor.go index b8b9c8d3cd..8d96aa0753 100644 --- a/relay/cache/cache_accessor.go +++ b/relay/cache/cache_accessor.go @@ -2,7 +2,6 @@ package cache import ( "context" - lru "github.com/hashicorp/golang-lru/v2" "sync" ) @@ -44,8 +43,8 @@ type cacheAccessor[K comparable, V any] struct { // to be written into the channel. lookupsInProgress map[K]*accessResult[V] - // cache is the LRU cache used to store values fetched by the accessor. - cache *lru.Cache[K, V] + // cache is the underlying cache that this wrapper manages. + cache Cache[K, V] // concurrencyLimiter is a channel used to limit the number of concurrent lookups that can be in progress. concurrencyLimiter chan struct{} @@ -57,21 +56,16 @@ type cacheAccessor[K comparable, V any] struct { accessor Accessor[K, V] } -// NewCachedAccessor creates a new CacheAccessor. The cacheSize parameter specifies the maximum number of items +// NewCacheAccessor creates a new CacheAccessor. The cacheSize parameter specifies the maximum number of items // that can be stored in the cache. The concurrencyLimit parameter specifies the maximum number of concurrent // lookups that can be in progress at any given time. If a greater number of lookups are requested, the excess // lookups will block until a lookup completes. If concurrencyLimit is zero, then no limits are imposed. The accessor // parameter is the function used to fetch values that are not in the cache. -func NewCachedAccessor[K comparable, V any]( - cacheSize int, +func NewCacheAccessor[K comparable, V any]( + cache Cache[K, V], concurrencyLimit int, accessor Accessor[K, V]) (CacheAccessor[K, V], error) { - cache, err := lru.New[K, V](cacheSize) - if err != nil { - return nil, err - } - lookupsInProgress := make(map[K]*accessResult[V]) var concurrencyLimiter chan struct{} @@ -166,7 +160,7 @@ func (c *cacheAccessor[K, V]) fetchResult(ctx context.Context, key K, result *ac // Update the cache if the fetch was successful. if err == nil { - c.cache.Add(key, value) + c.cache.Put(key, value) } // Provide the result to all other goroutines that may be waiting for it. diff --git a/relay/cache/cache_accessor_test.go b/relay/cache/cache_accessor_test.go index 01996222b2..10a86bda69 100644 --- a/relay/cache/cache_accessor_test.go +++ b/relay/cache/cache_accessor_test.go @@ -32,8 +32,9 @@ func TestRandomOperationsSingleThread(t *testing.T) { return &str, nil } cacheSize := rand.Intn(dataSize) + 1 + c := NewFIFOCache[int, *string](uint64(cacheSize)) - ca, err := NewCachedAccessor(cacheSize, 0, accessor) + ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) for i := 0; i < dataSize; i++ { @@ -80,7 +81,9 @@ func TestCacheMisses(t *testing.T) { return &str, nil } - ca, err := NewCachedAccessor(cacheSize, 0, accessor) + c := NewFIFOCache[int, *string](uint64(cacheSize)) + + ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) // Get the first cacheSize keys. This should fill the cache. @@ -143,7 +146,9 @@ func ParallelAccessTest(t *testing.T, sleepEnabled bool) { } cacheSize := rand.Intn(dataSize) + 1 - ca, err := NewCachedAccessor(cacheSize, 0, accessor) + c := NewFIFOCache[int, *string](uint64(cacheSize)) + + ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) // Lock the accessor. This will cause all cache misses to block. @@ -212,7 +217,9 @@ func TestParallelAccessWithError(t *testing.T) { } cacheSize := 100 - ca, err := NewCachedAccessor(cacheSize, 0, accessor) + c := NewFIFOCache[int, *string](uint64(cacheSize)) + + ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) // Lock the accessor. This will cause all cache misses to block. @@ -284,7 +291,9 @@ func TestConcurrencyLimiter(t *testing.T) { } cacheSize := 100 - ca, err := NewCachedAccessor(cacheSize, maxConcurrency, accessor) + c := NewFIFOCache[int, *string](uint64(cacheSize)) + + ca, err := NewCacheAccessor[int, *string](c, maxConcurrency, accessor) require.NoError(t, err) wg := sync.WaitGroup{} @@ -338,7 +347,9 @@ func TestOriginalRequesterTimesOut(t *testing.T) { } cacheSize := rand.Intn(dataSize) + 1 - ca, err := NewCachedAccessor(cacheSize, 0, accessor) + c := NewFIFOCache[int, *string](uint64(cacheSize)) + + ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) // Lock the accessor. This will cause all cache misses to block. @@ -426,7 +437,9 @@ func TestSecondaryRequesterTimesOut(t *testing.T) { } cacheSize := rand.Intn(dataSize) + 1 - ca, err := NewCachedAccessor(cacheSize, 0, accessor) + c := NewFIFOCache[int, *string](uint64(cacheSize)) + + ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) // Lock the accessor. This will cause all cache misses to block. diff --git a/relay/chunk_provider.go b/relay/chunk_provider.go index 61a9d973d4..0d5f38260c 100644 --- a/relay/chunk_provider.go +++ b/relay/chunk_provider.go @@ -47,7 +47,7 @@ func newChunkProvider( ctx context.Context, logger logging.Logger, chunkReader chunkstore.ChunkReader, - cacheSize int, + cacheSize uint64, maxIOConcurrency int, proofFetchTimeout time.Duration, coefficientFetchTimeout time.Duration) (*chunkProvider, error) { @@ -60,14 +60,20 @@ func newChunkProvider( coefficientFetchTimeout: coefficientFetchTimeout, } - c, err := cache.NewCachedAccessor[blobKeyWithMetadata, []*encoding.Frame]( - cacheSize, + c := cache.NewFIFOCache[blobKeyWithMetadata, []*encoding.Frame](cacheSize) + err := c.WithWeightCalculator(computeFramesCacheWeight) + if err != nil { + return nil, fmt.Errorf("error setting weight calculator: %w", err) + } + + cacheAccessor, err := cache.NewCacheAccessor[blobKeyWithMetadata, []*encoding.Frame]( + c, maxIOConcurrency, server.fetchFrames) if err != nil { return nil, err } - server.frameCache = c + server.frameCache = cacheAccessor return server, nil } @@ -75,6 +81,12 @@ func newChunkProvider( // frameMap is a map of blob keys to frames. type frameMap map[v2.BlobKey][]*encoding.Frame +// computeFramesCacheWeight computes the 'weight' of the frames for the cache. The weight of a list of frames +// is equal to the size required to store the data, in bytes. +func computeFramesCacheWeight(key blobKeyWithMetadata, frames []*encoding.Frame) uint64 { + return uint64(len(frames)) * uint64(key.metadata.chunkSizeBytes) +} + // GetFrames retrieves the frames for a blob. func (s *chunkProvider) GetFrames(ctx context.Context, mMap metadataMap) (frameMap, error) { diff --git a/relay/chunk_provider_test.go b/relay/chunk_provider_test.go index 8615ad7d23..06ec215b80 100644 --- a/relay/chunk_provider_test.go +++ b/relay/chunk_provider_test.go @@ -49,7 +49,7 @@ func TestFetchingIndividualBlobs(t *testing.T) { context.Background(), logger, chunkReader, - 10, + 1024*1024*32, 32, 10*time.Second, 10*time.Second) @@ -136,7 +136,7 @@ func TestFetchingBatchedBlobs(t *testing.T) { context.Background(), logger, chunkReader, - 10, + 1024*1024*32, 32, 10*time.Second, 10*time.Second) diff --git a/relay/cmd/config.go b/relay/cmd/config.go index 118a5d208a..b1a7c8b049 100644 --- a/relay/cmd/config.go +++ b/relay/cmd/config.go @@ -59,9 +59,9 @@ func NewConfig(ctx *cli.Context) (Config, error) { MaxGRPCMessageSize: ctx.Int(flags.MaxGRPCMessageSizeFlag.Name), MetadataCacheSize: ctx.Int(flags.MetadataCacheSizeFlag.Name), MetadataMaxConcurrency: ctx.Int(flags.MetadataMaxConcurrencyFlag.Name), - BlobCacheSize: ctx.Int(flags.BlobCacheSizeFlag.Name), + BlobCacheSize: ctx.Uint64(flags.BlobCacheSizeFlag.Name), BlobMaxConcurrency: ctx.Int(flags.BlobMaxConcurrencyFlag.Name), - ChunkCacheSize: ctx.Int(flags.ChunkCacheSizeFlag.Name), + ChunkCacheSize: ctx.Uint64(flags.ChunkCacheSizeFlag.Name), ChunkMaxConcurrency: ctx.Int(flags.ChunkMaxConcurrencyFlag.Name), RateLimits: limiter.Config{ MaxGetBlobOpsPerSecond: ctx.Float64(flags.MaxGetBlobOpsPerSecondFlag.Name), diff --git a/relay/cmd/flags/flags.go b/relay/cmd/flags/flags.go index ac6bd57258..3df62c14a9 100644 --- a/relay/cmd/flags/flags.go +++ b/relay/cmd/flags/flags.go @@ -59,12 +59,12 @@ var ( EnvVar: common.PrefixEnvVar(envVarPrefix, "METADATA_MAX_CONCURRENCY"), Value: 32, } - BlobCacheSizeFlag = cli.IntFlag{ + BlobCacheSizeFlag = cli.Uint64Flag{ Name: common.PrefixFlag(FlagPrefix, "blob-cache-size"), - Usage: "Max number of items in the blob cache", + Usage: "The size of the blob cache, in bytes.", Required: false, EnvVar: common.PrefixEnvVar(envVarPrefix, "BLOB_CACHE_SIZE"), - Value: 32, + Value: 1024 * 1024 * 1024, } BlobMaxConcurrencyFlag = cli.IntFlag{ Name: common.PrefixFlag(FlagPrefix, "blob-max-concurrency"), @@ -73,12 +73,12 @@ var ( EnvVar: common.PrefixEnvVar(envVarPrefix, "BLOB_MAX_CONCURRENCY"), Value: 32, } - ChunkCacheSizeFlag = cli.IntFlag{ + ChunkCacheSizeFlag = cli.Int64Flag{ Name: common.PrefixFlag(FlagPrefix, "chunk-cache-size"), - Usage: "Max number of items in the chunk cache", + Usage: "Size of the chunk cache, in bytes.", Required: false, EnvVar: common.PrefixEnvVar(envVarPrefix, "CHUNK_CACHE_SIZE"), - Value: 32, + Value: 4 * 1024 * 1024 * 1024, } ChunkMaxConcurrencyFlag = cli.IntFlag{ Name: common.PrefixFlag(FlagPrefix, "chunk-max-concurrency"), diff --git a/relay/metadata_provider.go b/relay/metadata_provider.go index 9f7eced495..58d5bb0c7c 100644 --- a/relay/metadata_provider.go +++ b/relay/metadata_provider.go @@ -69,8 +69,10 @@ func newMetadataProvider( fetchTimeout: fetchTimeout, } - metadataCache, err := cache.NewCachedAccessor[v2.BlobKey, *blobMetadata]( - metadataCacheSize, + c := cache.NewFIFOCache[v2.BlobKey, *blobMetadata](uint64(metadataCacheSize)) + + metadataCache, err := cache.NewCacheAccessor[v2.BlobKey, *blobMetadata]( + c, maxIOConcurrency, server.fetchMetadata) if err != nil { diff --git a/relay/server.go b/relay/server.go index 7a5445148d..4803185b19 100644 --- a/relay/server.go +++ b/relay/server.go @@ -75,14 +75,14 @@ type Config struct { // goroutines. MetadataMaxConcurrency int - // BlobCacheSize is the maximum number of items in the blob cache. - BlobCacheSize int + // BlobCacheSize is the maximum size of the blob cache, in bytes. + BlobCacheSize uint64 // BlobMaxConcurrency puts a limit on the maximum number of concurrent blob fetches actively running on goroutines. BlobMaxConcurrency int - // ChunkCacheSize is the maximum number of items in the chunk cache. - ChunkCacheSize int + // ChunkCacheSize is the maximum size of the chunk cache, in bytes. + ChunkCacheSize uint64 // ChunkMaxConcurrency is the size of the work pool for fetching chunks. Note that this does not // impact concurrency utilized by the s3 client to upload/download fragmented files. From 9100eac5a94c4cee5dc1301dee34334cd8566749 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 25 Nov 2024 09:09:42 -0600 Subject: [PATCH 41/45] Use queue for authenticator because it's a lot clener. Signed-off-by: Cody Littley --- relay/auth/authenticator.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/relay/auth/authenticator.go b/relay/auth/authenticator.go index 0eb78de18c..077108a6c3 100644 --- a/relay/auth/authenticator.go +++ b/relay/auth/authenticator.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" pb "github.com/Layr-Labs/eigenda/api/grpc/relay" + "github.com/Layr-Labs/eigenda/common/queue" "github.com/Layr-Labs/eigenda/core" lru "github.com/hashicorp/golang-lru/v2" "sync" @@ -38,7 +39,7 @@ type requestAuthenticator struct { authenticatedClients map[string]struct{} // authenticationTimeouts is a list of authentications that have been performed, along with their expiration times. - authenticationTimeouts []*authenticationTimeout // TODO use a queue here why not + authenticationTimeouts queue.Queue[*authenticationTimeout] // authenticationTimeoutDuration is the duration for which an auth is valid. // If this is zero, then auth saving is disabled, and each request will be authenticated independently. @@ -67,7 +68,7 @@ func NewRequestAuthenticator( authenticator := &requestAuthenticator{ ics: ics, authenticatedClients: make(map[string]struct{}), - authenticationTimeouts: make([]*authenticationTimeout, 0), + authenticationTimeouts: &queue.LinkedQueue[*authenticationTimeout]{}, authenticationTimeoutDuration: authenticationTimeoutDuration, keyCache: keyCache, } @@ -170,7 +171,7 @@ func (a *requestAuthenticator) saveAuthenticationResult(now time.Time, origin st defer a.savedAuthLock.Unlock() a.authenticatedClients[origin] = struct{}{} - a.authenticationTimeouts = append(a.authenticationTimeouts, + a.authenticationTimeouts.Push( &authenticationTimeout{ origin: origin, expiration: now.Add(a.authenticationTimeoutDuration), @@ -195,14 +196,12 @@ func (a *requestAuthenticator) isAuthenticationStillValid(now time.Time, address // removeOldAuthentications removes any authentications that have expired. // This method is not thread safe and should be called with the savedAuthLock held. func (a *requestAuthenticator) removeOldAuthentications(now time.Time) { - index := 0 - for ; index < len(a.authenticationTimeouts); index++ { - if a.authenticationTimeouts[index].expiration.After(now) { + for a.authenticationTimeouts.Size() > 0 { + next, _ := a.authenticationTimeouts.Peek() + if next.expiration.After(now) { break } - delete(a.authenticatedClients, a.authenticationTimeouts[index].origin) - } - if index > 0 { - a.authenticationTimeouts = a.authenticationTimeouts[index:] + delete(a.authenticatedClients, next.origin) + a.authenticationTimeouts.Pop() } } From 71a5aaa2c6c5422187c5378773f735745cf3ab62 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Mon, 25 Nov 2024 09:20:58 -0600 Subject: [PATCH 42/45] Cleanup. Signed-off-by: Cody Littley --- common/queue/linked_queue.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/common/queue/linked_queue.go b/common/queue/linked_queue.go index 2142759e77..949e8b4843 100644 --- a/common/queue/linked_queue.go +++ b/common/queue/linked_queue.go @@ -16,13 +16,13 @@ type node[T any] struct { } func (l *LinkedQueue[T]) Push(value T) { + newNode := &node[T]{value: value} if l.size == 0 { - l.front = &node[T]{value: value} + l.front = newNode l.back = l.front } else { - n := &node[T]{value: value} - l.back.next = n - l.back = n + l.back.next = newNode + l.back = newNode } l.size++ } @@ -36,6 +36,10 @@ func (l *LinkedQueue[T]) Pop() (T, bool) { value := l.front.value l.front = l.front.next l.size-- + + if l.size == 0 { + l.back = nil + } return value, true } From f8327dfdc9b48f8ae66b54b4a7dfcb34b191caa2 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Tue, 26 Nov 2024 08:21:19 -0600 Subject: [PATCH 43/45] Use gods library for data structures. Signed-off-by: Cody Littley --- common/queue/linked_queue.go | 56 ----------------------------- common/queue/linked_queue_test.go | 58 ------------------------------- common/queue/queue.go | 18 ---------- go.mod | 1 + go.sum | 2 ++ relay/auth/authenticator.go | 14 ++++---- relay/cache/fifo-cache.go | 12 ++++--- 7 files changed, 18 insertions(+), 143 deletions(-) delete mode 100644 common/queue/linked_queue.go delete mode 100644 common/queue/linked_queue_test.go delete mode 100644 common/queue/queue.go diff --git a/common/queue/linked_queue.go b/common/queue/linked_queue.go deleted file mode 100644 index 949e8b4843..0000000000 --- a/common/queue/linked_queue.go +++ /dev/null @@ -1,56 +0,0 @@ -package queue - -var _ Queue[string] = &LinkedQueue[string]{} - -// LinkedQueue is a queue that uses a linked list to store values. It is not thread safe. -type LinkedQueue[T any] struct { - front *node[T] - back *node[T] - size int -} - -// node is a single element in the linked list. -type node[T any] struct { - value T - next *node[T] -} - -func (l *LinkedQueue[T]) Push(value T) { - newNode := &node[T]{value: value} - if l.size == 0 { - l.front = newNode - l.back = l.front - } else { - l.back.next = newNode - l.back = newNode - } - l.size++ -} - -func (l *LinkedQueue[T]) Pop() (T, bool) { - if l.size == 0 { - var zero T - return zero, false - } - - value := l.front.value - l.front = l.front.next - l.size-- - - if l.size == 0 { - l.back = nil - } - return value, true -} - -func (l *LinkedQueue[T]) Peek() (T, bool) { - if l.size == 0 { - var zero T - return zero, false - } - return l.front.value, true -} - -func (l *LinkedQueue[T]) Size() int { - return l.size -} diff --git a/common/queue/linked_queue_test.go b/common/queue/linked_queue_test.go deleted file mode 100644 index 7e790ef8f6..0000000000 --- a/common/queue/linked_queue_test.go +++ /dev/null @@ -1,58 +0,0 @@ -package queue - -import ( - tu "github.com/Layr-Labs/eigenda/common/testutils" - "github.com/stretchr/testify/require" - "golang.org/x/exp/rand" - "testing" -) - -func TestEmptyQueue(t *testing.T) { - var q LinkedQueue[int] - require.Equal(t, 0, q.Size()) - - next, ok := q.Peek() - require.False(t, ok) - require.Equal(t, 0, next) - - next, ok = q.Pop() - require.False(t, ok) - require.Equal(t, 0, next) - - require.Equal(t, 0, q.Size()) -} - -func TestRandomOperations(t *testing.T) { - tu.InitializeRandom() - - var q LinkedQueue[int] - expectedValues := make([]int, 0) - - for i := 0; i < 1000; i++ { - if rand.Int()%2 == 0 || len(expectedValues) == 0 { - // push an item - itemToPush := rand.Int() - q.Push(itemToPush) - expectedValues = append(expectedValues, itemToPush) - } else { - // pop an item - - next, ok := q.Pop() - expectedNext := expectedValues[0] - expectedValues = expectedValues[1:] - - require.True(t, ok) - require.Equal(t, expectedNext, next) - } - - require.Equal(t, len(expectedValues), q.Size()) - - next, ok := q.Peek() - if len(expectedValues) == 0 { - require.False(t, ok) - } else { - require.True(t, ok) - require.Equal(t, expectedValues[0], next) - } - } -} diff --git a/common/queue/queue.go b/common/queue/queue.go deleted file mode 100644 index 8b605bd7f3..0000000000 --- a/common/queue/queue.go +++ /dev/null @@ -1,18 +0,0 @@ -package queue - -// Queue is an interface for a generic queue. It's absurd there isn't an equivalent in the standard golang libraries. -type Queue[T any] interface { - // Push adds a value to the queue. - Push(value T) - - // Pop removes and returns the value at the front of the queue. - // If the queue is empty, the second return value will be false. - Pop() (T, bool) - - // Peek returns the value at the front of the queue without removing it. - // If the queue is empty, the second return value will be false. - Peek() (T, bool) - - // Size returns the number of values in the queue. - Size() int -} diff --git a/go.mod b/go.mod index 45e0bbe49a..dc262e397b 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.13.12 github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.28.6 github.com/consensys/gnark-crypto v0.12.1 + github.com/emirpasic/gods v1.18.1 github.com/ethereum/go-ethereum v1.14.8 github.com/fxamacker/cbor/v2 v2.5.0 github.com/gin-contrib/logger v0.2.6 diff --git a/go.sum b/go.sum index 4762b276fb..d3b4dde0bf 100644 --- a/go.sum +++ b/go.sum @@ -165,6 +165,8 @@ github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6 github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= +github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/ethereum/c-kzg-4844 v1.0.0 h1:0X1LBXxaEtYD9xsyj9B9ctQEZIpnvVDeoBx8aHEwTNA= github.com/ethereum/c-kzg-4844 v1.0.0/go.mod h1:VewdlzQmpT5QSrVhbBuGoCdFJkpaJlO1aQputP83wc0= github.com/ethereum/go-ethereum v1.14.8 h1:NgOWvXS+lauK+zFukEvi85UmmsS/OkV0N23UZ1VTIig= diff --git a/relay/auth/authenticator.go b/relay/auth/authenticator.go index 077108a6c3..c45ee4da47 100644 --- a/relay/auth/authenticator.go +++ b/relay/auth/authenticator.go @@ -5,8 +5,9 @@ import ( "errors" "fmt" pb "github.com/Layr-Labs/eigenda/api/grpc/relay" - "github.com/Layr-Labs/eigenda/common/queue" "github.com/Layr-Labs/eigenda/core" + "github.com/emirpasic/gods/queues" + "github.com/emirpasic/gods/queues/linkedlistqueue" lru "github.com/hashicorp/golang-lru/v2" "sync" "time" @@ -39,7 +40,7 @@ type requestAuthenticator struct { authenticatedClients map[string]struct{} // authenticationTimeouts is a list of authentications that have been performed, along with their expiration times. - authenticationTimeouts queue.Queue[*authenticationTimeout] + authenticationTimeouts queues.Queue // authenticationTimeoutDuration is the duration for which an auth is valid. // If this is zero, then auth saving is disabled, and each request will be authenticated independently. @@ -68,7 +69,7 @@ func NewRequestAuthenticator( authenticator := &requestAuthenticator{ ics: ics, authenticatedClients: make(map[string]struct{}), - authenticationTimeouts: &queue.LinkedQueue[*authenticationTimeout]{}, + authenticationTimeouts: linkedlistqueue.New(), authenticationTimeoutDuration: authenticationTimeoutDuration, keyCache: keyCache, } @@ -171,7 +172,7 @@ func (a *requestAuthenticator) saveAuthenticationResult(now time.Time, origin st defer a.savedAuthLock.Unlock() a.authenticatedClients[origin] = struct{}{} - a.authenticationTimeouts.Push( + a.authenticationTimeouts.Enqueue( &authenticationTimeout{ origin: origin, expiration: now.Add(a.authenticationTimeoutDuration), @@ -197,11 +198,12 @@ func (a *requestAuthenticator) isAuthenticationStillValid(now time.Time, address // This method is not thread safe and should be called with the savedAuthLock held. func (a *requestAuthenticator) removeOldAuthentications(now time.Time) { for a.authenticationTimeouts.Size() > 0 { - next, _ := a.authenticationTimeouts.Peek() + val, _ := a.authenticationTimeouts.Peek() + next := val.(*authenticationTimeout) if next.expiration.After(now) { break } delete(a.authenticatedClients, next.origin) - a.authenticationTimeouts.Pop() + a.authenticationTimeouts.Dequeue() } } diff --git a/relay/cache/fifo-cache.go b/relay/cache/fifo-cache.go index 1b994ae25e..7aa3a7d0a2 100644 --- a/relay/cache/fifo-cache.go +++ b/relay/cache/fifo-cache.go @@ -2,7 +2,8 @@ package cache import ( "errors" - "github.com/Layr-Labs/eigenda/common/queue" + "github.com/emirpasic/gods/queues" + "github.com/emirpasic/gods/queues/linkedlistqueue" ) var _ Cache[string, string] = &FIFOCache[string, string]{} @@ -15,7 +16,7 @@ type FIFOCache[K comparable, V any] struct { currentWeight uint64 maxWeight uint64 data map[K]V - expirationQueue queue.Queue[K] + expirationQueue queues.Queue } // NewFIFOCache creates a new FIFOCache. @@ -28,7 +29,7 @@ func NewFIFOCache[K comparable, V any](maxWeight uint64) *FIFOCache[K, V] { maxWeight: maxWeight, data: make(map[K]V), weightCalculator: defaultWeightCalculator, - expirationQueue: &queue.LinkedQueue[K]{}, + expirationQueue: linkedlistqueue.New(), } } @@ -51,7 +52,7 @@ func (f *FIFOCache[K, V]) Put(key K, value V) { oldWeight := f.weightCalculator(key, old) f.currentWeight -= oldWeight } else { - f.expirationQueue.Push(key) + f.expirationQueue.Enqueue(key) } if f.currentWeight < f.maxWeight { @@ -60,7 +61,8 @@ func (f *FIFOCache[K, V]) Put(key K, value V) { } for f.currentWeight > f.maxWeight { - keyToEvict, _ := f.expirationQueue.Pop() + val, _ := f.expirationQueue.Dequeue() + keyToEvict := val.(K) weightToEvict := f.weightCalculator(keyToEvict, f.data[keyToEvict]) delete(f.data, keyToEvict) f.currentWeight -= weightToEvict From e9255173c2bd82504ca902aafcf596a571aa7c35 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 27 Nov 2024 09:29:58 -0600 Subject: [PATCH 44/45] Made suggested changes. Signed-off-by: Cody Littley --- relay/blob_provider.go | 6 +----- relay/cache/cache.go | 5 ----- relay/cache/cache_accessor_test.go | 28 +++++++++++++++++++++------- relay/cache/fifo-cache.go | 17 ++--------------- relay/cache/fifo_cache_test.go | 12 ++++-------- relay/chunk_provider.go | 6 +----- relay/cmd/config.go | 2 +- relay/cmd/flags/flags.go | 6 +++--- relay/metadata_provider.go | 5 ++++- relay/server.go | 6 +++--- relay/server_test.go | 4 ++-- 11 files changed, 42 insertions(+), 55 deletions(-) diff --git a/relay/blob_provider.go b/relay/blob_provider.go index bc4856aa7f..70cc310665 100644 --- a/relay/blob_provider.go +++ b/relay/blob_provider.go @@ -42,11 +42,7 @@ func newBlobProvider( fetchTimeout: fetchTimeout, } - c := cache.NewFIFOCache[v2.BlobKey, []byte](blobCacheSize) - err := c.WithWeightCalculator(computeBlobCacheWeight) - if err != nil { - return nil, fmt.Errorf("error creating blob cache: %w", err) - } + c := cache.NewFIFOCache[v2.BlobKey, []byte](blobCacheSize, computeBlobCacheWeight) cacheAccessor, err := cache.NewCacheAccessor[v2.BlobKey, []byte](c, maxIOConcurrency, server.fetchBlob) if err != nil { diff --git a/relay/cache/cache.go b/relay/cache/cache.go index 034a5fdd20..c76140da4e 100644 --- a/relay/cache/cache.go +++ b/relay/cache/cache.go @@ -17,11 +17,6 @@ type Cache[K comparable, V any] interface { // of the cache in and of itself. Put(key K, value V) - // WithWeightCalculator sets the weight calculator for the cache. May only be called - // when the cache is empty. The weight calculator should be an idempotent function that - // always returns the same output given the same input. - WithWeightCalculator(weightCalculator WeightCalculator[K, V]) error - // Size returns the number of key-value pairs in the cache. Size() int diff --git a/relay/cache/cache_accessor_test.go b/relay/cache/cache_accessor_test.go index 10a86bda69..0f2ac501d4 100644 --- a/relay/cache/cache_accessor_test.go +++ b/relay/cache/cache_accessor_test.go @@ -32,7 +32,9 @@ func TestRandomOperationsSingleThread(t *testing.T) { return &str, nil } cacheSize := rand.Intn(dataSize) + 1 - c := NewFIFOCache[int, *string](uint64(cacheSize)) + c := NewFIFOCache[int, *string](uint64(cacheSize), func(key int, value *string) uint64 { + return 1 + }) ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) @@ -81,7 +83,9 @@ func TestCacheMisses(t *testing.T) { return &str, nil } - c := NewFIFOCache[int, *string](uint64(cacheSize)) + c := NewFIFOCache[int, *string](uint64(cacheSize), func(key int, value *string) uint64 { + return 1 + }) ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) @@ -146,7 +150,9 @@ func ParallelAccessTest(t *testing.T, sleepEnabled bool) { } cacheSize := rand.Intn(dataSize) + 1 - c := NewFIFOCache[int, *string](uint64(cacheSize)) + c := NewFIFOCache[int, *string](uint64(cacheSize), func(key int, value *string) uint64 { + return 1 + }) ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) @@ -217,7 +223,9 @@ func TestParallelAccessWithError(t *testing.T) { } cacheSize := 100 - c := NewFIFOCache[int, *string](uint64(cacheSize)) + c := NewFIFOCache[int, *string](uint64(cacheSize), func(key int, value *string) uint64 { + return 1 + }) ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) @@ -291,7 +299,9 @@ func TestConcurrencyLimiter(t *testing.T) { } cacheSize := 100 - c := NewFIFOCache[int, *string](uint64(cacheSize)) + c := NewFIFOCache[int, *string](uint64(cacheSize), func(key int, value *string) uint64 { + return 1 + }) ca, err := NewCacheAccessor[int, *string](c, maxConcurrency, accessor) require.NoError(t, err) @@ -347,7 +357,9 @@ func TestOriginalRequesterTimesOut(t *testing.T) { } cacheSize := rand.Intn(dataSize) + 1 - c := NewFIFOCache[int, *string](uint64(cacheSize)) + c := NewFIFOCache[int, *string](uint64(cacheSize), func(key int, value *string) uint64 { + return 1 + }) ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) @@ -437,7 +449,9 @@ func TestSecondaryRequesterTimesOut(t *testing.T) { } cacheSize := rand.Intn(dataSize) + 1 - c := NewFIFOCache[int, *string](uint64(cacheSize)) + c := NewFIFOCache[int, *string](uint64(cacheSize), func(key int, value *string) uint64 { + return 1 + }) ca, err := NewCacheAccessor[int, *string](c, 0, accessor) require.NoError(t, err) diff --git a/relay/cache/fifo-cache.go b/relay/cache/fifo-cache.go index 7aa3a7d0a2..1c2e7c6abd 100644 --- a/relay/cache/fifo-cache.go +++ b/relay/cache/fifo-cache.go @@ -1,7 +1,6 @@ package cache import ( - "errors" "github.com/emirpasic/gods/queues" "github.com/emirpasic/gods/queues/linkedlistqueue" ) @@ -20,15 +19,11 @@ type FIFOCache[K comparable, V any] struct { } // NewFIFOCache creates a new FIFOCache. -func NewFIFOCache[K comparable, V any](maxWeight uint64) *FIFOCache[K, V] { - defaultWeightCalculator := func(key K, value V) uint64 { - return uint64(1) - } - +func NewFIFOCache[K comparable, V any](maxWeight uint64, calculator WeightCalculator[K, V]) *FIFOCache[K, V] { return &FIFOCache[K, V]{ maxWeight: maxWeight, data: make(map[K]V), - weightCalculator: defaultWeightCalculator, + weightCalculator: calculator, expirationQueue: linkedlistqueue.New(), } } @@ -69,14 +64,6 @@ func (f *FIFOCache[K, V]) Put(key K, value V) { } } -func (f *FIFOCache[K, V]) WithWeightCalculator(weightCalculator WeightCalculator[K, V]) error { - if f.Size() > 0 { - return errors.New("cannot set weight calculator on non-empty cache") - } - f.weightCalculator = weightCalculator - return nil -} - func (f *FIFOCache[K, V]) Size() int { return len(f.data) } diff --git a/relay/cache/fifo_cache_test.go b/relay/cache/fifo_cache_test.go index 3cafc2b198..da4de5ad1f 100644 --- a/relay/cache/fifo_cache_test.go +++ b/relay/cache/fifo_cache_test.go @@ -11,7 +11,9 @@ func TestExpirationOrder(t *testing.T) { tu.InitializeRandom() maxWeight := uint64(10 + rand.Intn(10)) - c := NewFIFOCache[int, int](maxWeight) + c := NewFIFOCache[int, int](maxWeight, func(key int, value int) uint64 { + return 1 + }) require.Equal(t, uint64(0), c.Weight()) require.Equal(t, 0, c.Size()) @@ -83,9 +85,7 @@ func TestWeightedValues(t *testing.T) { return uint64(key) } - c := NewFIFOCache[int, int](maxWeight) - err := c.WithWeightCalculator(weightCalculator) - require.NoError(t, err) + c := NewFIFOCache[int, int](maxWeight, weightCalculator) expectedValues := make(map[int]int) @@ -135,8 +135,4 @@ func TestWeightedValues(t *testing.T) { require.True(t, ok) require.Equal(t, v, value) } - - // Sanity check, attempting to update the weight calculator function at this point should fail. - err = c.WithWeightCalculator(weightCalculator) - require.Error(t, err) } diff --git a/relay/chunk_provider.go b/relay/chunk_provider.go index 0d5f38260c..5bc2926732 100644 --- a/relay/chunk_provider.go +++ b/relay/chunk_provider.go @@ -60,11 +60,7 @@ func newChunkProvider( coefficientFetchTimeout: coefficientFetchTimeout, } - c := cache.NewFIFOCache[blobKeyWithMetadata, []*encoding.Frame](cacheSize) - err := c.WithWeightCalculator(computeFramesCacheWeight) - if err != nil { - return nil, fmt.Errorf("error setting weight calculator: %w", err) - } + c := cache.NewFIFOCache[blobKeyWithMetadata, []*encoding.Frame](cacheSize, computeFramesCacheWeight) cacheAccessor, err := cache.NewCacheAccessor[blobKeyWithMetadata, []*encoding.Frame]( c, diff --git a/relay/cmd/config.go b/relay/cmd/config.go index c62b66d5a6..ff1513d172 100644 --- a/relay/cmd/config.go +++ b/relay/cmd/config.go @@ -60,7 +60,7 @@ func NewConfig(ctx *cli.Context) (Config, error) { MaxGRPCMessageSize: ctx.Int(flags.MaxGRPCMessageSizeFlag.Name), MetadataCacheSize: ctx.Int(flags.MetadataCacheSizeFlag.Name), MetadataMaxConcurrency: ctx.Int(flags.MetadataMaxConcurrencyFlag.Name), - BlobCacheSize: ctx.Uint64(flags.BlobCacheSizeFlag.Name), + BlobCacheBytes: ctx.Uint64(flags.BlobCacheBytes.Name), BlobMaxConcurrency: ctx.Int(flags.BlobMaxConcurrencyFlag.Name), ChunkCacheSize: ctx.Uint64(flags.ChunkCacheSizeFlag.Name), ChunkMaxConcurrency: ctx.Int(flags.ChunkMaxConcurrencyFlag.Name), diff --git a/relay/cmd/flags/flags.go b/relay/cmd/flags/flags.go index 667327fd0d..0bb24ce2ae 100644 --- a/relay/cmd/flags/flags.go +++ b/relay/cmd/flags/flags.go @@ -60,8 +60,8 @@ var ( EnvVar: common.PrefixEnvVar(envVarPrefix, "METADATA_MAX_CONCURRENCY"), Value: 32, } - BlobCacheSizeFlag = cli.Uint64Flag{ - Name: common.PrefixFlag(FlagPrefix, "blob-cache-size"), + BlobCacheBytes = cli.Uint64Flag{ + Name: common.PrefixFlag(FlagPrefix, "blob-cache-bytes"), Usage: "The size of the blob cache, in bytes.", Required: false, EnvVar: common.PrefixEnvVar(envVarPrefix, "BLOB_CACHE_SIZE"), @@ -297,7 +297,7 @@ var optionalFlags = []cli.Flag{ MaxGRPCMessageSizeFlag, MetadataCacheSizeFlag, MetadataMaxConcurrencyFlag, - BlobCacheSizeFlag, + BlobCacheBytes, BlobMaxConcurrencyFlag, ChunkCacheSizeFlag, ChunkMaxConcurrencyFlag, diff --git a/relay/metadata_provider.go b/relay/metadata_provider.go index ebc08bba96..e1f188bb9e 100644 --- a/relay/metadata_provider.go +++ b/relay/metadata_provider.go @@ -74,7 +74,10 @@ func newMetadataProvider( } server.blobParamsMap.Store(blobParamsMap) - c := cache.NewFIFOCache[v2.BlobKey, *blobMetadata](uint64(metadataCacheSize)) + c := cache.NewFIFOCache[v2.BlobKey, *blobMetadata](uint64(metadataCacheSize), + func(key v2.BlobKey, value *blobMetadata) uint64 { + return uint64(1) + }) metadataCache, err := cache.NewCacheAccessor[v2.BlobKey, *blobMetadata]( c, diff --git a/relay/server.go b/relay/server.go index 8cb4b35a5a..eb00709e9f 100644 --- a/relay/server.go +++ b/relay/server.go @@ -78,8 +78,8 @@ type Config struct { // goroutines. MetadataMaxConcurrency int - // BlobCacheSize is the maximum size of the blob cache, in bytes. - BlobCacheSize uint64 + // BlobCacheBytes is the maximum size of the blob cache, in bytes. + BlobCacheBytes uint64 // BlobMaxConcurrency puts a limit on the maximum number of concurrent blob fetches actively running on goroutines. BlobMaxConcurrency int @@ -153,7 +153,7 @@ func NewServer( ctx, logger, blobStore, - config.BlobCacheSize, + config.BlobCacheBytes, config.BlobMaxConcurrency, config.Timeouts.InternalGetBlobTimeout) if err != nil { diff --git a/relay/server_test.go b/relay/server_test.go index 3e16c624c3..58b8893714 100644 --- a/relay/server_test.go +++ b/relay/server_test.go @@ -25,9 +25,9 @@ func defaultConfig() *Config { MaxGRPCMessageSize: 1024 * 1024 * 300, MetadataCacheSize: 1024 * 1024, MetadataMaxConcurrency: 32, - BlobCacheSize: 32, + BlobCacheBytes: 1024 * 1024, BlobMaxConcurrency: 32, - ChunkCacheSize: 32, + ChunkCacheSize: 1024 * 1024, ChunkMaxConcurrency: 32, MaxKeysPerGetChunksRequest: 1024, RateLimits: limiter.Config{ From a9d332923e6d4983b3e3722e6b93ac47ae406715 Mon Sep 17 00:00:00 2001 From: Cody Littley Date: Wed, 27 Nov 2024 09:31:15 -0600 Subject: [PATCH 45/45] Cleanup Signed-off-by: Cody Littley --- relay/cache/cache.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/relay/cache/cache.go b/relay/cache/cache.go index c76140da4e..1d3c7f1a04 100644 --- a/relay/cache/cache.go +++ b/relay/cache/cache.go @@ -3,11 +3,11 @@ package cache // WeightCalculator is a function that calculates the weight of a key-value pair in a Cache. // By default, the weight of a key-value pair is 1. Cache capacity is always specified in terms of // the weight of the key-value pairs it can hold, rather than the number of key-value pairs. -// -// Unless otherwise noted, Cache implementations are not required to be thread safe. type WeightCalculator[K comparable, V any] func(key K, value V) uint64 // Cache is an interface for a generic cache. +// +// Unless otherwise noted, Cache implementations are not required to be thread safe. type Cache[K comparable, V any] interface { // Get returns the value associated with the key, and a boolean indicating whether the key was found in the cache. Get(key K) (V, bool)