From def39cfcdccf4993e9fbdf3c9e8a3bf00ecafeba Mon Sep 17 00:00:00 2001
From: RuijingGuo <ruijing.guo@intel.com>
Date: Tue, 22 Oct 2024 23:22:48 -0700
Subject: [PATCH] setup ollama service in aipc docker compose (#1008)

Signed-off-by: Guo Ruijing <ruijing.guo@intel.com>
---
 .../docker_compose/intel/cpu/aipc/README.md   | 99 -------------------
 .../intel/cpu/aipc/compose.yaml               | 18 ++++
 2 files changed, 18 insertions(+), 99 deletions(-)

diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/README.md b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
index 35f9093b0..a438103e7 100644
--- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
@@ -2,105 +2,6 @@
 
 This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on AIPC. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`.
 
-## Prerequisites
-
-We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
-
-Please follow the instructions to set up Ollama on your PC. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
-
-### Set Up Ollama LLM Service
-
-#### Install Ollama Service
-
-Install Ollama service with one command:
-
-```
-curl -fsSL https://ollama.com/install.sh | sh
-```
-
-#### Set Ollama Service Configuration
-
-Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment.
-Replace **<host_ip>** with your host IPV4 (please use external public IP). For example the host_ip is 10.132.x.y, then `Environment="OLLAMA_HOST=10.132.x.y:11434"'.
-
-```
-Environment="OLLAMA_HOST=host_ip:11434"
-```
-
-#### Set https_proxy environment for Ollama
-
-If your system access network through proxy, add https_proxy in Ollama Service Configuration file
-
-```
-Environment="https_proxy=Your_HTTPS_Proxy"
-```
-
-#### Restart Ollama services
-
-```
-$ sudo systemctl daemon-reload
-$ sudo systemctl restart ollama.service
-```
-
-#### Check the service started
-
-```
-netstat -tuln | grep  11434
-```
-
-The output are:
-
-```
-tcp        0      0 10.132.x.y:11434      0.0.0.0:*               LISTEN
-```
-
-#### Pull Ollama LLM model
-
-Run the command to download LLM models. The <host_ip> is the one set in [Ollama Service Configuration](#Set-Ollama-Service-Configuration)
-
-```
-export host_ip=<host_ip>
-export OLLAMA_HOST=http://${host_ip}:11434
-ollama pull llama3.2
-```
-
-After downloaded the models, you can list the models by `ollama list`.
-
-The output should be similar to the following:
-
-```
-NAME            ID                SIZE      MODIFIED
-llama3.2:latest   a80c4f17acd5    2.0 GB    2 minutes ago
-```
-
-### Consume Ollama LLM Service
-
-Access ollama service to verify that the ollama is functioning correctly.
-
-```bash
-curl http://${host_ip}:11434/v1/chat/completions \
-    -H "Content-Type: application/json" \
-    -d '{
-        "model": "llama3.2",
-        "messages": [
-            {
-                "role": "system",
-                "content": "You are a helpful assistant."
-            },
-            {
-                "role": "user",
-                "content": "Hello!"
-            }
-        ]
-    }'
-```
-
-The outputs are similar to these:
-
-```
-{"id":"chatcmpl-4","object":"chat.completion","created":1729232496,"model":"llama3.2","system_fingerprint":"fp_ollama","choices":[{"index":0,"message":{"role":"assistant","content":"How can I assist you today? Are you looking for information, answers to a question, or just need someone to chat with? I'm here to help in any way I can."},"finish_reason":"stop"}],"usage":{"prompt_tokens":33,"completion_tokens":38,"total_tokens":71}}
-```
-
 ## 🚀 Build Docker Images
 
 First of all, you need to build Docker Images locally and install the python package of it.
diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
index 8f2832595..ef15837e0 100644
--- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
@@ -72,6 +72,21 @@ services:
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  ollama-service:
+    image: ollama/ollama
+    container_name: ollama
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama:/root/.ollama
+    entrypoint: ["bash", "-c"]
+    command: ["ollama serve & sleep 10 && ollama run ${OLLAMA_MODEL} & wait"]
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      OLLAMA_MODEL: ${OLLAMA_MODEL}
+
   chatqna-aipc-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
     container_name: chatqna-aipc-backend-server
@@ -134,6 +149,9 @@ services:
     ipc: host
     restart: always
 
+volumes:
+  ollama:
+
 networks:
   default:
     driver: bridge