From def39cfcdccf4993e9fbdf3c9e8a3bf00ecafeba Mon Sep 17 00:00:00 2001 From: RuijingGuo Date: Tue, 22 Oct 2024 23:22:48 -0700 Subject: [PATCH] setup ollama service in aipc docker compose (#1008) Signed-off-by: Guo Ruijing --- .../docker_compose/intel/cpu/aipc/README.md | 99 ------------------- .../intel/cpu/aipc/compose.yaml | 18 ++++ 2 files changed, 18 insertions(+), 99 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/README.md b/ChatQnA/docker_compose/intel/cpu/aipc/README.md index 35f9093b0..a438103e7 100644 --- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md +++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md @@ -2,105 +2,6 @@ This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on AIPC. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. -## Prerequisites - -We use [Ollama](https://ollama.com/) as our LLM service for AIPC. - -Please follow the instructions to set up Ollama on your PC. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples. - -### Set Up Ollama LLM Service - -#### Install Ollama Service - -Install Ollama service with one command: - -``` -curl -fsSL https://ollama.com/install.sh | sh -``` - -#### Set Ollama Service Configuration - -Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment. -Replace **** with your host IPV4 (please use external public IP). For example the host_ip is 10.132.x.y, then `Environment="OLLAMA_HOST=10.132.x.y:11434"'. - -``` -Environment="OLLAMA_HOST=host_ip:11434" -``` - -#### Set https_proxy environment for Ollama - -If your system access network through proxy, add https_proxy in Ollama Service Configuration file - -``` -Environment="https_proxy=Your_HTTPS_Proxy" -``` - -#### Restart Ollama services - -``` -$ sudo systemctl daemon-reload -$ sudo systemctl restart ollama.service -``` - -#### Check the service started - -``` -netstat -tuln | grep 11434 -``` - -The output are: - -``` -tcp 0 0 10.132.x.y:11434 0.0.0.0:* LISTEN -``` - -#### Pull Ollama LLM model - -Run the command to download LLM models. The is the one set in [Ollama Service Configuration](#Set-Ollama-Service-Configuration) - -``` -export host_ip= -export OLLAMA_HOST=http://${host_ip}:11434 -ollama pull llama3.2 -``` - -After downloaded the models, you can list the models by `ollama list`. - -The output should be similar to the following: - -``` -NAME ID SIZE MODIFIED -llama3.2:latest a80c4f17acd5 2.0 GB 2 minutes ago -``` - -### Consume Ollama LLM Service - -Access ollama service to verify that the ollama is functioning correctly. - -```bash -curl http://${host_ip}:11434/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "llama3.2", - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant." - }, - { - "role": "user", - "content": "Hello!" - } - ] - }' -``` - -The outputs are similar to these: - -``` -{"id":"chatcmpl-4","object":"chat.completion","created":1729232496,"model":"llama3.2","system_fingerprint":"fp_ollama","choices":[{"index":0,"message":{"role":"assistant","content":"How can I assist you today? Are you looking for information, answers to a question, or just need someone to chat with? I'm here to help in any way I can."},"finish_reason":"stop"}],"usage":{"prompt_tokens":33,"completion_tokens":38,"total_tokens":71}} -``` - ## 🚀 Build Docker Images First of all, you need to build Docker Images locally and install the python package of it. diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml index 8f2832595..ef15837e0 100644 --- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml +++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml @@ -72,6 +72,21 @@ services: HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate + ollama-service: + image: ollama/ollama + container_name: ollama + ports: + - "11434:11434" + volumes: + - ollama:/root/.ollama + entrypoint: ["bash", "-c"] + command: ["ollama serve & sleep 10 && ollama run ${OLLAMA_MODEL} & wait"] + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + OLLAMA_MODEL: ${OLLAMA_MODEL} + chatqna-aipc-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} container_name: chatqna-aipc-backend-server @@ -134,6 +149,9 @@ services: ipc: host restart: always +volumes: + ollama: + networks: default: driver: bridge