Skip to content

Commit

Permalink
Fix issue when generating vLLM distros
Browse files Browse the repository at this point in the history
Signed-off-by: Yuan Tang <[email protected]>
  • Loading branch information
terrytangyuan committed Jan 13, 2025
1 parent 314806c commit 7186407
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 46 deletions.
35 changes: 9 additions & 26 deletions llama_stack/templates/remote-vllm/run-with-safety.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: remote-vllm
apis:
- agents
- inference
- memory
- safety
- telemetry
- tool_runtime
providers:
inference:
- provider_id: vllm-inference
Expand Down Expand Up @@ -52,50 +52,33 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: vllm-safety
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- shield_id: ${env.SAFETY_MODEL}
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
23 changes: 4 additions & 19 deletions llama_stack/templates/remote-vllm/run.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: remote-vllm
apis:
- agents
- inference
- memory
- safety
- telemetry
- tool_runtime
providers:
inference:
- provider_id: vllm-inference
Expand Down Expand Up @@ -46,39 +46,24 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups: []
2 changes: 1 addition & 1 deletion llama_stack/templates/remote-vllm/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def get_distribution_template() -> DistributionTemplate:
"Inference model loaded into the vLLM server",
),
"VLLM_URL": (
"http://host.docker.internal:5100}/v1",
"http://host.docker.internal:5100/v1",
"URL of the vLLM server with the main inference model",
),
"MAX_TOKENS": (
Expand Down

0 comments on commit 7186407

Please sign in to comment.