Skip to content

Commit

Permalink
#46 Defined latency measurement config
Browse files Browse the repository at this point in the history
  • Loading branch information
roma-glushko committed Jan 14, 2024
1 parent 36b1fad commit dee8bbb
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 10 deletions.
26 changes: 23 additions & 3 deletions docs/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ const docTemplate = `{
"type": "object",
"properties": {
"timeout": {
"type": "integer"
"type": "string"
}
}
},
Expand Down Expand Up @@ -160,6 +160,23 @@ const docTemplate = `{
}
}
},
"latency.Config": {
"type": "object",
"properties": {
"decay": {
"description": "Weight of new latency measurements",
"type": "number"
},
"update_interval": {
"description": "How often gateway should probe models with not the lowest response latency",
"type": "string"
},
"warmup_samples": {
"description": "The number of latency probes required to init moving average",
"type": "integer"
}
}
},
"openai.Config": {
"type": "object",
"required": [
Expand Down Expand Up @@ -253,6 +270,9 @@ const docTemplate = `{
"description": "Model instance ID (unique in scope of the router)",
"type": "string"
},
"latency": {
"$ref": "#/definitions/latency.Config"
},
"openai": {
"$ref": "#/definitions/openai.Config"
}
Expand Down Expand Up @@ -318,13 +338,13 @@ const docTemplate = `{
"routing.Strategy": {
"type": "string",
"enum": [
"least_latency",
"priority",
"least_latency",
"round-robin"
],
"x-enum-varnames": [
"LeastLatency",
"Priority",
"LeastLatency",
"RoundRobin"
]
},
Expand Down
26 changes: 23 additions & 3 deletions docs/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
"type": "object",
"properties": {
"timeout": {
"type": "integer"
"type": "string"
}
}
},
Expand Down Expand Up @@ -157,6 +157,23 @@
}
}
},
"latency.Config": {
"type": "object",
"properties": {
"decay": {
"description": "Weight of new latency measurements",
"type": "number"
},
"update_interval": {
"description": "How often gateway should probe models with not the lowest response latency",
"type": "string"
},
"warmup_samples": {
"description": "The number of latency probes required to init moving average",
"type": "integer"
}
}
},
"openai.Config": {
"type": "object",
"required": [
Expand Down Expand Up @@ -250,6 +267,9 @@
"description": "Model instance ID (unique in scope of the router)",
"type": "string"
},
"latency": {
"$ref": "#/definitions/latency.Config"
},
"openai": {
"$ref": "#/definitions/openai.Config"
}
Expand Down Expand Up @@ -315,13 +335,13 @@
"routing.Strategy": {
"type": "string",
"enum": [
"least_latency",
"priority",
"least_latency",
"round-robin"
],
"x-enum-varnames": [
"LeastLatency",
"Priority",
"LeastLatency",
"RoundRobin"
]
},
Expand Down
21 changes: 18 additions & 3 deletions docs/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ definitions:
clients.ClientConfig:
properties:
timeout:
type: integer
type: string
type: object
http.ErrorSchema:
properties:
Expand All @@ -22,6 +22,19 @@ definitions:
$ref: '#/definitions/routers.LangRouterConfig'
type: array
type: object
latency.Config:
properties:
decay:
description: Weight of new latency measurements
type: number
update_interval:
description: How often gateway should probe models with not the lowest response
latency
type: string
warmup_samples:
description: The number of latency probes required to init moving average
type: integer
type: object
openai.Config:
properties:
baseUrl:
Expand Down Expand Up @@ -83,6 +96,8 @@ definitions:
id:
description: Model instance ID (unique in scope of the router)
type: string
latency:
$ref: '#/definitions/latency.Config'
openai:
$ref: '#/definitions/openai.Config'
required:
Expand Down Expand Up @@ -126,13 +141,13 @@ definitions:
type: object
routing.Strategy:
enum:
- least_latency
- priority
- least_latency
- round-robin
type: string
x-enum-varnames:
- LeastLatency
- Priority
- LeastLatency
- RoundRobin
schemas.ChatMessage:
properties:
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/clients/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package clients
import "time"

type ClientConfig struct {
Timeout *time.Duration `yaml:"timeout,omitempty" json:"timeout" swaggertype:"primitive,integer"`
Timeout *time.Duration `yaml:"timeout,omitempty" json:"timeout" swaggertype:"primitive,string"`
}

func DefaultClientConfig() *ClientConfig {
Expand Down
3 changes: 3 additions & 0 deletions pkg/providers/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package providers
import (
"errors"
"fmt"
"glide/pkg/routers/latency"

"glide/pkg/providers/clients"

Expand All @@ -18,6 +19,7 @@ type LangModelConfig struct {
ID string `yaml:"id" json:"id" validate:"required"` // Model instance ID (unique in scope of the router)
Enabled bool `yaml:"enabled" json:"enabled"` // Is the model enabled?
ErrorBudget health.ErrorBudget `yaml:"error_budget" json:"error_budget" swaggertype:"primitive,string"`
Latency *latency.Config `yaml:"latency" json:"latency"`
Client *clients.ClientConfig `yaml:"client" json:"client"`
OpenAI *openai.Config `yaml:"openai" json:"openai"`
// Add other providers like
Expand All @@ -30,6 +32,7 @@ func DefaultLangModelConfig() *LangModelConfig {
Enabled: true,
Client: clients.DefaultClientConfig(),
ErrorBudget: health.DefaultErrorBudget(),
Latency: latency.DefaultConfig(),
}
}

Expand Down
20 changes: 20 additions & 0 deletions pkg/routers/latency/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package latency

import "time"

// Config defines setting for moving average latency calculations
type Config struct {
Decay float32 `yaml:"decay" json:"decay"` // Weight of new latency measurements
WarmupSamples int `yaml:"warmup_samples" json:"warmup_samples"` // The number of latency probes required to init moving average
UpdateInterval *time.Duration `yaml:"update_interval,omitempty" json:"update_interval" swaggertype:"primitive,string"` // How often gateway should probe models with not the lowest response latency
}

func DefaultConfig() *Config {
defaultUpdateInterval := 30 * time.Second

return &Config{
Decay: 0.06,
WarmupSamples: 3,
UpdateInterval: &defaultUpdateInterval,
}
}

0 comments on commit dee8bbb

Please sign in to comment.