Skip to content

Commit

Permalink
#46 passed new config everywhere
Browse files Browse the repository at this point in the history
  • Loading branch information
roma-glushko committed Jan 14, 2024
1 parent dee8bbb commit 3965f5c
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 14 deletions.
4 changes: 2 additions & 2 deletions docs/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,13 +338,13 @@ const docTemplate = `{
"routing.Strategy": {
"type": "string",
"enum": [
"priority",
"least_latency",
"priority",
"round-robin"
],
"x-enum-varnames": [
"Priority",
"LeastLatency",
"Priority",
"RoundRobin"
]
},
Expand Down
4 changes: 2 additions & 2 deletions docs/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -335,13 +335,13 @@
"routing.Strategy": {
"type": "string",
"enum": [
"priority",
"least_latency",
"priority",
"round-robin"
],
"x-enum-varnames": [
"Priority",
"LeastLatency",
"Priority",
"RoundRobin"
]
},
Expand Down
4 changes: 2 additions & 2 deletions docs/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,13 @@ definitions:
type: object
routing.Strategy:
enum:
- priority
- least_latency
- priority
- round-robin
type: string
x-enum-varnames:
- Priority
- LeastLatency
- Priority
- RoundRobin
schemas.ChatMessage:
properties:
Expand Down
5 changes: 3 additions & 2 deletions pkg/providers/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package providers
import (
"errors"
"fmt"

"glide/pkg/routers/latency"

"glide/pkg/providers/clients"
Expand All @@ -18,7 +19,7 @@ var ErrProviderNotFound = errors.New("provider not found")
type LangModelConfig struct {
ID string `yaml:"id" json:"id" validate:"required"` // Model instance ID (unique in scope of the router)
Enabled bool `yaml:"enabled" json:"enabled"` // Is the model enabled?
ErrorBudget health.ErrorBudget `yaml:"error_budget" json:"error_budget" swaggertype:"primitive,string"`
ErrorBudget *health.ErrorBudget `yaml:"error_budget" json:"error_budget" swaggertype:"primitive,string"`
Latency *latency.Config `yaml:"latency" json:"latency"`
Client *clients.ClientConfig `yaml:"client" json:"client"`
OpenAI *openai.Config `yaml:"openai" json:"openai"`
Expand All @@ -43,7 +44,7 @@ func (c *LangModelConfig) ToModel(tel *telemetry.Telemetry) (*LangModel, error)
return nil, fmt.Errorf("error initing openai client: %v", err)
}

return NewLangModel(c.ID, client, c.ErrorBudget), nil
return NewLangModel(c.ID, client, *c.ErrorBudget, *c.Latency), nil
}

return nil, ErrProviderNotFound
Expand Down
4 changes: 2 additions & 2 deletions pkg/providers/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ type LangModel struct {
latency *latency.MovingAverage
}

func NewLangModel(modelID string, client LangModelProvider, budget health.ErrorBudget) *LangModel {
func NewLangModel(modelID string, client LangModelProvider, budget health.ErrorBudget, latencyConfig latency.Config) *LangModel {
return &LangModel{
modelID: modelID,
client: client,
rateLimit: health.NewRateLimitTracker(),
errorBudget: health.NewTokenBucket(budget.TimePerTokenMicro(), budget.Budget()),
latency: latency.NewMovingAverage(0.05, 3), // TODO: set from configs
latency: latency.NewMovingAverage(latencyConfig.Decay, latencyConfig.WarmupSamples),
}
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/routers/health/error_budget.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ func NewErrorBudget(budget int, unit Unit) *ErrorBudget {
}
}

func DefaultErrorBudget() ErrorBudget {
return ErrorBudget{
func DefaultErrorBudget() *ErrorBudget {
return &ErrorBudget{
budget: 10,
unit: MIN,
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/routers/latency/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import "time"

// Config defines setting for moving average latency calculations
type Config struct {
Decay float32 `yaml:"decay" json:"decay"` // Weight of new latency measurements
WarmupSamples int `yaml:"warmup_samples" json:"warmup_samples"` // The number of latency probes required to init moving average
Decay float64 `yaml:"decay" json:"decay"` // Weight of new latency measurements
WarmupSamples uint8 `yaml:"warmup_samples" json:"warmup_samples"` // The number of latency probes required to init moving average
UpdateInterval *time.Duration `yaml:"update_interval,omitempty" json:"update_interval" swaggertype:"primitive,string"` // How often gateway should probe models with not the lowest response latency
}

Expand Down
19 changes: 19 additions & 0 deletions pkg/routers/router_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"testing"
"time"

"glide/pkg/routers/latency"

"glide/pkg/providers/clients"

"github.com/stretchr/testify/require"
Expand All @@ -18,16 +20,20 @@ import (

func TestLangRouter_Priority_PickFistHealthy(t *testing.T) {
budget := health.NewErrorBudget(3, health.SEC)
latConfig := latency.DefaultConfig()

langModels := []providers.LanguageModel{
providers.NewLangModel(
"first",
providers.NewProviderMock([]providers.ResponseMock{{Msg: "1"}, {Msg: "2"}}),
*budget,
*latConfig,
),
providers.NewLangModel(
"second",
providers.NewProviderMock([]providers.ResponseMock{{Msg: "1"}}),
*budget,
*latConfig,
),
}

Expand Down Expand Up @@ -59,21 +65,25 @@ func TestLangRouter_Priority_PickFistHealthy(t *testing.T) {

func TestLangRouter_Priority_PickThirdHealthy(t *testing.T) {
budget := health.NewErrorBudget(1, health.SEC)
latConfig := latency.DefaultConfig()
langModels := []providers.LanguageModel{
providers.NewLangModel(
"first",
providers.NewProviderMock([]providers.ResponseMock{{Err: &ErrNoModelAvailable}, {Msg: "3"}}),
*budget,
*latConfig,
),
providers.NewLangModel(
"second",
providers.NewProviderMock([]providers.ResponseMock{{Err: &ErrNoModelAvailable}, {Msg: "4"}}),
*budget,
*latConfig,
),
providers.NewLangModel(
"third",
providers.NewProviderMock([]providers.ResponseMock{{Msg: "1"}, {Msg: "2"}}),
*budget,
*latConfig,
),
}

Expand Down Expand Up @@ -107,16 +117,19 @@ func TestLangRouter_Priority_PickThirdHealthy(t *testing.T) {

func TestLangRouter_Priority_SuccessOnRetry(t *testing.T) {
budget := health.NewErrorBudget(1, health.MILLI)
latConfig := latency.DefaultConfig()
langModels := []providers.LanguageModel{
providers.NewLangModel(
"first",
providers.NewProviderMock([]providers.ResponseMock{{Err: &ErrNoModelAvailable}, {Msg: "2"}}),
*budget,
*latConfig,
),
providers.NewLangModel(
"second",
providers.NewProviderMock([]providers.ResponseMock{{Err: &ErrNoModelAvailable}, {Msg: "1"}}),
*budget,
*latConfig,
),
}

Expand All @@ -143,16 +156,19 @@ func TestLangRouter_Priority_SuccessOnRetry(t *testing.T) {

func TestLangRouter_Priority_UnhealthyModelInThePool(t *testing.T) {
budget := health.NewErrorBudget(1, health.MIN)
latConfig := latency.DefaultConfig()
langModels := []providers.LanguageModel{
providers.NewLangModel(
"first",
providers.NewProviderMock([]providers.ResponseMock{{Err: &clients.ErrProviderUnavailable}, {Msg: "3"}}),
*budget,
*latConfig,
),
providers.NewLangModel(
"second",
providers.NewProviderMock([]providers.ResponseMock{{Msg: "1"}, {Msg: "2"}}),
*budget,
*latConfig,
),
}

Expand Down Expand Up @@ -181,16 +197,19 @@ func TestLangRouter_Priority_UnhealthyModelInThePool(t *testing.T) {

func TestLangRouter_Priority_AllModelsUnavailable(t *testing.T) {
budget := health.NewErrorBudget(1, health.SEC)
latConfig := latency.DefaultConfig()
langModels := []providers.LanguageModel{
providers.NewLangModel(
"first",
providers.NewProviderMock([]providers.ResponseMock{{Err: &ErrNoModelAvailable}, {Err: &ErrNoModelAvailable}}),
*budget,
*latConfig,
),
providers.NewLangModel(
"second",
providers.NewProviderMock([]providers.ResponseMock{{Err: &ErrNoModelAvailable}, {Err: &ErrNoModelAvailable}}),
*budget,
*latConfig,
),
}

Expand Down

0 comments on commit 3965f5c

Please sign in to comment.