From dee8bbb7cfd85196202b6f756dddf8b9b2aff7a0 Mon Sep 17 00:00:00 2001 From: Roman Glushko Date: Sun, 14 Jan 2024 18:52:02 +0200 Subject: [PATCH] #46 Defined latency measurement config --- docs/docs.go | 26 +++++++++++++++++++++++--- docs/swagger.json | 26 +++++++++++++++++++++++--- docs/swagger.yaml | 21 ++++++++++++++++++--- pkg/providers/clients/config.go | 2 +- pkg/providers/config.go | 3 +++ pkg/routers/latency/config.go | 20 ++++++++++++++++++++ 6 files changed, 88 insertions(+), 10 deletions(-) create mode 100644 pkg/routers/latency/config.go diff --git a/docs/docs.go b/docs/docs.go index c660eaeb..15295761 100644 --- a/docs/docs.go +++ b/docs/docs.go @@ -129,7 +129,7 @@ const docTemplate = `{ "type": "object", "properties": { "timeout": { - "type": "integer" + "type": "string" } } }, @@ -160,6 +160,23 @@ const docTemplate = `{ } } }, + "latency.Config": { + "type": "object", + "properties": { + "decay": { + "description": "Weight of new latency measurements", + "type": "number" + }, + "update_interval": { + "description": "How often gateway should probe models with not the lowest response latency", + "type": "string" + }, + "warmup_samples": { + "description": "The number of latency probes required to init moving average", + "type": "integer" + } + } + }, "openai.Config": { "type": "object", "required": [ @@ -253,6 +270,9 @@ const docTemplate = `{ "description": "Model instance ID (unique in scope of the router)", "type": "string" }, + "latency": { + "$ref": "#/definitions/latency.Config" + }, "openai": { "$ref": "#/definitions/openai.Config" } @@ -318,13 +338,13 @@ const docTemplate = `{ "routing.Strategy": { "type": "string", "enum": [ - "least_latency", "priority", + "least_latency", "round-robin" ], "x-enum-varnames": [ - "LeastLatency", "Priority", + "LeastLatency", "RoundRobin" ] }, diff --git a/docs/swagger.json b/docs/swagger.json index af4ad040..3f15d316 100644 --- a/docs/swagger.json +++ b/docs/swagger.json @@ -126,7 +126,7 @@ "type": "object", "properties": { "timeout": { - "type": "integer" + "type": "string" } } }, @@ -157,6 +157,23 @@ } } }, + "latency.Config": { + "type": "object", + "properties": { + "decay": { + "description": "Weight of new latency measurements", + "type": "number" + }, + "update_interval": { + "description": "How often gateway should probe models with not the lowest response latency", + "type": "string" + }, + "warmup_samples": { + "description": "The number of latency probes required to init moving average", + "type": "integer" + } + } + }, "openai.Config": { "type": "object", "required": [ @@ -250,6 +267,9 @@ "description": "Model instance ID (unique in scope of the router)", "type": "string" }, + "latency": { + "$ref": "#/definitions/latency.Config" + }, "openai": { "$ref": "#/definitions/openai.Config" } @@ -315,13 +335,13 @@ "routing.Strategy": { "type": "string", "enum": [ - "least_latency", "priority", + "least_latency", "round-robin" ], "x-enum-varnames": [ - "LeastLatency", "Priority", + "LeastLatency", "RoundRobin" ] }, diff --git a/docs/swagger.yaml b/docs/swagger.yaml index f5bf313e..ad333c39 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -3,7 +3,7 @@ definitions: clients.ClientConfig: properties: timeout: - type: integer + type: string type: object http.ErrorSchema: properties: @@ -22,6 +22,19 @@ definitions: $ref: '#/definitions/routers.LangRouterConfig' type: array type: object + latency.Config: + properties: + decay: + description: Weight of new latency measurements + type: number + update_interval: + description: How often gateway should probe models with not the lowest response + latency + type: string + warmup_samples: + description: The number of latency probes required to init moving average + type: integer + type: object openai.Config: properties: baseUrl: @@ -83,6 +96,8 @@ definitions: id: description: Model instance ID (unique in scope of the router) type: string + latency: + $ref: '#/definitions/latency.Config' openai: $ref: '#/definitions/openai.Config' required: @@ -126,13 +141,13 @@ definitions: type: object routing.Strategy: enum: - - least_latency - priority + - least_latency - round-robin type: string x-enum-varnames: - - LeastLatency - Priority + - LeastLatency - RoundRobin schemas.ChatMessage: properties: diff --git a/pkg/providers/clients/config.go b/pkg/providers/clients/config.go index 9fca01e8..d01a2ab1 100644 --- a/pkg/providers/clients/config.go +++ b/pkg/providers/clients/config.go @@ -3,7 +3,7 @@ package clients import "time" type ClientConfig struct { - Timeout *time.Duration `yaml:"timeout,omitempty" json:"timeout" swaggertype:"primitive,integer"` + Timeout *time.Duration `yaml:"timeout,omitempty" json:"timeout" swaggertype:"primitive,string"` } func DefaultClientConfig() *ClientConfig { diff --git a/pkg/providers/config.go b/pkg/providers/config.go index 964ac52d..0eb6a4c9 100644 --- a/pkg/providers/config.go +++ b/pkg/providers/config.go @@ -3,6 +3,7 @@ package providers import ( "errors" "fmt" + "glide/pkg/routers/latency" "glide/pkg/providers/clients" @@ -18,6 +19,7 @@ type LangModelConfig struct { ID string `yaml:"id" json:"id" validate:"required"` // Model instance ID (unique in scope of the router) Enabled bool `yaml:"enabled" json:"enabled"` // Is the model enabled? ErrorBudget health.ErrorBudget `yaml:"error_budget" json:"error_budget" swaggertype:"primitive,string"` + Latency *latency.Config `yaml:"latency" json:"latency"` Client *clients.ClientConfig `yaml:"client" json:"client"` OpenAI *openai.Config `yaml:"openai" json:"openai"` // Add other providers like @@ -30,6 +32,7 @@ func DefaultLangModelConfig() *LangModelConfig { Enabled: true, Client: clients.DefaultClientConfig(), ErrorBudget: health.DefaultErrorBudget(), + Latency: latency.DefaultConfig(), } } diff --git a/pkg/routers/latency/config.go b/pkg/routers/latency/config.go new file mode 100644 index 00000000..d6459a1c --- /dev/null +++ b/pkg/routers/latency/config.go @@ -0,0 +1,20 @@ +package latency + +import "time" + +// Config defines setting for moving average latency calculations +type Config struct { + Decay float32 `yaml:"decay" json:"decay"` // Weight of new latency measurements + WarmupSamples int `yaml:"warmup_samples" json:"warmup_samples"` // The number of latency probes required to init moving average + UpdateInterval *time.Duration `yaml:"update_interval,omitempty" json:"update_interval" swaggertype:"primitive,string"` // How often gateway should probe models with not the lowest response latency +} + +func DefaultConfig() *Config { + defaultUpdateInterval := 30 * time.Second + + return &Config{ + Decay: 0.06, + WarmupSamples: 3, + UpdateInterval: &defaultUpdateInterval, + } +}