Support Llama3.2 vision and vision guard model (opea-project#753)

* Support llama3.2 models Signed-off-by: lvliang-intel <[email protected]> * fix issues Signed-off-by: lvliang-intel <[email protected]> * update code and doc Signed-off-by: lvliang-intel <[email protected]> * add llama vision guard support Signed-off-by: lvliang-intel <[email protected]> * add llama guard prompt format utils Signed-off-by: lvliang-intel <[email protected]> * add tp support Signed-off-by: lvliang-intel <[email protected]> * add wheel Signed-off-by: lvliang-intel <[email protected]> * fix accuracy issue Signed-off-by: lvliang-intel <[email protected]> * update tp service code Signed-off-by: lvliang-intel <[email protected]> * update dockerfile Signed-off-by: lvliang-intel <[email protected]> * support lvm tp serving Signed-off-by: letonghan <[email protected]> * update dockerfile Signed-off-by: lvliang-intel <[email protected]> * add run tp script Signed-off-by: lvliang-intel <[email protected]> * fix max_new_tokens Signed-off-by: lvliang-intel <[email protected]> * update run tp script Signed-off-by: lvliang-intel <[email protected]> * refine code and doc Signed-off-by: lvliang-intel <[email protected]> * install transformers from local wheel Signed-off-by: lvliang-intel <[email protected]> * update code using official transformers Signed-off-by: lvliang-intel <[email protected]> * remove unnecessary code Signed-off-by: lvliang-intel <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove blank line Signed-off-by: lvliang-intel <[email protected]> * fix precommit issues Signed-off-by: lvliang-intel <[email protected]> * fix cd issue Signed-off-by: lvliang-intel <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: lvliang-intel <[email protected]> Signed-off-by: letonghan <[email protected]> Co-authored-by: letonghan <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pallavijaini0525 · Sep 30, 2024 · 534c227 · 534c227
1 parent 405a2fc
commit 534c227
Show file tree

Hide file tree

Showing 30 changed files with 7,190 additions and 13 deletions.
diff --git a/.github/workflows/docker/compose/lvms-compose-cd.yaml b/.github/workflows/docker/compose/lvms-compose-cd.yaml
@@ -11,3 +11,15 @@ services:
     build:
       dockerfile: comps/lvms/predictionguard/Dockerfile
     image: ${REGISTRY:-opea}/lvm-predictionguard:${TAG:-latest}
+  lvm-llama-vision:
+    build:
+      dockerfile: comps/lvms/llama-vision/Dockerfile
+    image: ${REGISTRY:-opea}/lvm-llama-vision:${TAG:-latest}
+  lvm-llama-vision-tp:
+    build:
+      dockerfile: comps/lvms/llama-vision/Dockerfile_tp
+    image: ${REGISTRY:-opea}/lvm-llama-vision-tp:${TAG:-latest}
+  lvm-llama-vision-guard:
+    build:
+      dockerfile: comps/lvms/llama-vision/Dockerfile_guard
+    image: ${REGISTRY:-opea}/lvm-llama-vision-guard:${TAG:-latest}
diff --git a/comps/cores/mega/manifests_exporter.py b/comps/cores/mega/manifests_exporter.py
@@ -266,7 +266,7 @@ def create_llm_dependency_deployment_and_service(resource_requirements=None, rep
     deployment = create_k8s_resources(
         name="llm-dependency-deploy",
         replicas=7,
-        image="ghcr.io/huggingface/tgi-gaudi:2.0.4",
+        image="ghcr.io/huggingface/tgi-gaudi:2.0.5",
         container_ports=[80],
         node_selector={"node-type": "chatqna-opea"},
         resources=resource_requirements,

diff --git a/comps/dataprep/neo4j/langchain/docker-compose-dataprep-neo4j.yaml b/comps/dataprep/neo4j/langchain/docker-compose-dataprep-neo4j.yaml
@@ -10,7 +10,7 @@ services:
       - "6337:6337"
       - "6338:6338"
   tgi_gaudi_service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
     container_name: tgi-service
     ports:
       - "8088:80"

diff --git a/comps/guardrails/llama_guard/langchain/README.md b/comps/guardrails/llama_guard/langchain/README.md
@@ -36,8 +36,8 @@ pip install -r requirements.txt
 export HF_TOKEN=${your_hf_api_token}
 volume=$PWD/data
 model_id="meta-llama/Meta-Llama-Guard-2-8B"
-docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048
+docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048
 ```
 
 ### 1.3 Verify the TGI Gaudi Service

diff --git a/comps/guardrails/llama_guard/langchain/docker_compose_guardrails.yaml b/comps/guardrails/llama_guard/langchain/docker_compose_guardrails.yaml
@@ -5,7 +5,7 @@ version: "3.8"
 
 services:
   tgi_gaudi_service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
     container_name: tgi-service
     ports:
       - "8088:80"

diff --git a/comps/llms/text-generation/tgi/launch_tgi_service.sh b/comps/llms/text-generation/tgi/launch_tgi_service.sh
@@ -31,9 +31,9 @@ volume=$PWD/data
 
 # Build the Docker run command based on the number of cards
 if [ "$num_cards" -eq 1 ]; then
-    docker_cmd="docker run -d --name="ChatQnA_server" -p $port_number:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model_name"
+    docker_cmd="docker run -d --name="ChatQnA_server" -p $port_number:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_name"
 else
-    docker_cmd="docker run -d --name="ChatQnA_server" -p $port_number:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model_name --sharded true --num-shard $num_cards"
+    docker_cmd="docker run -d --name="ChatQnA_server" -p $port_number:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.5 --max-input-tokens 4096 --max-total-tokens 8192 --model-id $model_name --sharded true --num-shard $num_cards"
 fi
 
 # Execute the Docker run command

diff --git a/comps/lvms/llama-vision/Dockerfile b/comps/lvms/llama-vision/Dockerfile
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# HABANA environment
+FROM vault.habana.ai/gaudi-docker/1.17.1/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest as hpu
+
+ENV LANG=en_US.UTF-8
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    git-lfs \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+RUN git lfs install
+
+COPY comps /home/user/comps
+
+RUN cd /home/user/comps/lvms/llama-vision/ && \
+    pip install --no-cache-dir -r requirements.txt  && \
+    pip install --upgrade Pillow
+
+ENV PYTHONPATH=/root:/home/user
+
+WORKDIR /home/user/comps/lvms/llama-vision/
+
+ENTRYPOINT ["python", "lvm.py"]
diff --git a/comps/lvms/llama-vision/Dockerfile_guard b/comps/lvms/llama-vision/Dockerfile_guard
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# HABANA environment
+FROM vault.habana.ai/gaudi-docker/1.17.1/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest as hpu
+
+ENV LANG=en_US.UTF-8
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    git-lfs \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+RUN git lfs install
+
+COPY comps /home/user/comps
+
+RUN cd /home/user/comps/lvms/llama-vision/ && \
+    pip install --no-cache-dir -r requirements.txt  && \
+    pip install --upgrade Pillow
+
+ENV PYTHONPATH=/root:/home/user
+
+WORKDIR /home/user/comps/lvms/llama-vision/
+
+ENTRYPOINT ["python", "lvm_guard.py"]
diff --git a/comps/lvms/llama-vision/Dockerfile_tp b/comps/lvms/llama-vision/Dockerfile_tp
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# HABANA environment
+FROM vault.habana.ai/gaudi-docker/1.17.1/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest as hpu
+
+ENV LANG=en_US.UTF-8
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    git-lfs \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+RUN git lfs install
+
+COPY comps /home/user/comps
+
+RUN pip install git+https://github.com/HabanaAI/[email protected]
+RUN pip install git+https://github.com/huggingface/[email protected]
+
+RUN cd /home/user/comps/lvms/llama-vision/  \
+    pip install --no-cache-dir --upgrade pip && \
+    bash update && \
+    pip install -r /home/user/comps/lvms/llama-vision/requirements_tp.txt
+
+ENV PYTHONPATH=/root:/home/user
+
+WORKDIR /home/user/comps/lvms/llama-vision/
+
+ENTRYPOINT ["bash", "run_tp.sh"]
diff --git a/comps/lvms/llama-vision/README.md b/comps/lvms/llama-vision/README.md
@@ -0,0 +1,72 @@
+# LVM Microservice
+
+Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using Llama Vision as the base large visual model. It accepts two inputs: a prompt and an image. It outputs the answer to the prompt about the image.
+
+## 🚀 Start Microservice with Docker
+
+### Build Images
+
+#### Build Llama Vision Model
+
+```bash
+cd ../../../
+docker build -t opea/lvm-llama-vision:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llama-vision/Dockerfile .
+```
+
+#### Build Llama Vision Model with deepspeed
+
+If you need to build the image for 90B models, use the following command:
+
+```bash
+docker build -t opea/lvm-llama-vision-tp:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llama-vision/Dockerfile_tp .
+```
+
+#### Build Llama Vision Guard Model
+
+```bash
+cd ../../../
+docker build -t opea/lvm-llama-vision-guard:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llama-vision/Dockerfile_guard .
+```
+
+### Start Llama LVM Service
+
+#### Start Llama Vision Model Service
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_token}
+docker run -it -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LLAMA_VISION_MODEL_ID="meta-llama/Llama-3.2-11B-Vision-Instruct" -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host opea/lvm-llama-vision:latest
+```
+
+#### Start Llama Vision Model Service with deepspeed
+
+If you need to run the 90B models, use the following command:
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_token}
+export WORLD_SIZE=4
+export no_proxy=localhosst,127.0.0.1
+docker run -it -p 9599:9599 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MODEL_ID="meta-llama/Llama-3.2-90B-Vision-Instruct" -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e WORLD_SIZE=$WORLD_SIZE --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice opea/lvm-llama-vision-tp:latest
+```
+
+#### Start Llama Vision Guard Model Service
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_token}
+docker run -it -p 9499:9499 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LLAMA_VISION_MODEL_ID="meta-llama/Llama-Guard-3-11B-Vision" -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host opea/lvm-llama-vision-guard:latest
+```
+
+### Test
+
+```bash
+# Use curl
+
+# curl Llama Vision 11B Model Service
+http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?", "max_new_tokens": 128}' -H 'Content-Type: application/json'
+
+# curl Llama Vision Guard Model Service
+http_proxy="" curl http://localhost:9499/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?", "max_new_tokens": 128}' -H 'Content-Type: application/json'
+
+# curl Llama Vision 90B Model Service
+http_proxy="" curl http://localhost:9599/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?", "max_new_tokens": 128}' -H 'Content-Type: application/json'
+
+```