Allow the Ollama microservice to be configurable with different models (

#280) Signed-off-by: lvliang-intel <[email protected]> Co-authored-by: Sihan Chen <[email protected]>
opea-project · Jul 15, 2024 · 2458e2f · 2458e2f
1 parent dd939c5
commit 2458e2f
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 2 deletions.
diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py
@@ -77,6 +77,7 @@ class RerankedDoc(BaseDoc):
 
 
 class LLMParamsDoc(BaseDoc):
+    model: Optional[str] = None  # for openai and ollama
     query: str
     max_new_tokens: int = 1024
     top_k: int = 10

diff --git a/comps/llms/text-generation/ollama/README.md b/comps/llms/text-generation/ollama/README.md
@@ -62,5 +62,5 @@ docker run --network host opea/llm-ollama:latest
 # Consume the Ollama Microservice
 
 ```bash
-curl http://127.0.0.1:9000/v1/chat/completions  -X POST   -d '{"query":"What is Deep Learning?","max_new_tokens":32,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}'   -H 'Content-Type: application/json'
+curl http://127.0.0.1:9000/v1/chat/completions  -X POST   -d '{"model": "llama3", "query":"What is Deep Learning?","max_new_tokens":32,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}'   -H 'Content-Type: application/json'
 ```
diff --git a/comps/llms/text-generation/ollama/llm.py b/comps/llms/text-generation/ollama/llm.py
@@ -21,7 +21,7 @@
 def llm_generate(input: LLMParamsDoc):
     ollama = Ollama(
         base_url=ollama_endpoint,
-        model="llama3",
+        model=input.model,
         num_predict=input.max_new_tokens,
         top_k=input.top_k,
         top_p=input.top_p,