diff --git a/.github/workflows/docker/compose/text2image-compose-cd.yaml b/.github/workflows/docker/compose/text2image-compose-cd.yaml new file mode 100644 index 000000000..d2737922f --- /dev/null +++ b/.github/workflows/docker/compose/text2image-compose-cd.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: text2image,text2image-gaudi +services: + text2image: + build: + dockerfile: comps/text2image/Dockerfile + image: ${REGISTRY:-opea}/text2image:${TAG:-latest} + text2image-gaudi: + build: + dockerfile: comps/text2image/Dockerfile.intel_hpu + image: ${REGISTRY:-opea}/text2image-gaudi:${TAG:-latest} diff --git a/comps/__init__.py b/comps/__init__.py index 2a65ce4fa..557b33f9e 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -26,6 +26,8 @@ ImagesPath, VideoPath, ImageDoc, + SDInputs, + SDOutputs, TextImageDoc, MultimodalDoc, EmbedMultimodalDoc, diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index 1776e35ae..c26d59fba 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -264,6 +264,15 @@ class LVMVideoDoc(BaseDoc): max_new_tokens: conint(ge=0, le=1024) = 512 +class SDInputs(BaseDoc): + prompt: str + num_images_per_prompt: int = 1 + + +class SDOutputs(BaseDoc): + images: list + + class ImagePath(BaseDoc): image_path: str diff --git a/comps/text2image/Dockerfile b/comps/text2image/Dockerfile new file mode 100644 index 000000000..50dc6e3b7 --- /dev/null +++ b/comps/text2image/Dockerfile @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +# Set environment variables +ENV LANG=en_US.UTF-8 + +ARG ARCH="cpu" + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/comps/text2image/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/text2image + +RUN echo python text2image.py --bf16 >> run.sh + +CMD bash run.sh \ No newline at end of file diff --git a/comps/text2image/Dockerfile.intel_hpu b/comps/text2image/Dockerfile.intel_hpu new file mode 100644 index 000000000..ffa228cf8 --- /dev/null +++ b/comps/text2image/Dockerfile.intel_hpu @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# HABANA environment +# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu +FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +RUN chown -R user /home/user/comps/text2image + +RUN rm -rf /etc/ssh/ssh_host* +USER user +# Set environment variables +ENV LANG=en_US.UTF-8 +ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana + +# Install requirements and optimum habana +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/text2image/requirements.txt && \ + pip install --no-cache-dir optimum[habana] + +WORKDIR /home/user/comps/text2image + +RUN echo python text2image.py --device hpu --use_hpu_graphs --bf16 >> run.sh + +CMD bash run.sh \ No newline at end of file diff --git a/comps/text2image/README.md b/comps/text2image/README.md new file mode 100644 index 000000000..e4eadc3cf --- /dev/null +++ b/comps/text2image/README.md @@ -0,0 +1,97 @@ +# Text-to-Image Microservice + +Text-to-Image is a task that generate image conditioning on the provided text. This microservice supports text-to-image task by using Stable Diffusion (SD) model. + +# 🚀1. Start Microservice with Python (Option 1) + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +## 1.2 Start Text-to-Image Microservice + +Select Stable Diffusion (SD) model and assign its name to a environment variable as below: + +```bash +# SD1.5 +export MODEL=stable-diffusion-v1-5/stable-diffusion-v1-5 +# SD2.1 +export MODEL=stabilityai/stable-diffusion-2-1 +# SDXL +export MODEL=stabilityai/stable-diffusion-xl-base-1.0 +# SD3 +export MODEL=stabilityai/stable-diffusion-3-medium-diffusers +``` + +Set huggingface token: + +```bash +export HF_TOKEN= +``` + +Start the OPEA Microservice: + +```bash +python text2image.py --bf16 --model_name_or_path $MODEL --token $HF_TOKEN +``` + +# 🚀2. Start Microservice with Docker (Option 2) + +## 2.1 Build Images + +Select Stable Diffusion (SD) model and assign its name to a environment variable as below: + +```bash +# SD1.5 +export MODEL=stable-diffusion-v1-5/stable-diffusion-v1-5 +# SD2.1 +export MODEL=stabilityai/stable-diffusion-2-1 +# SDXL +export MODEL=stabilityai/stable-diffusion-xl-base-1.0 +# SD3 +export MODEL=stabilityai/stable-diffusion-3-medium-diffusers +``` + +### 2.1.1 Text-to-Image Service Image on Xeon + +Build text-to-image service image on Xeon with below command: + +```bash +cd ../.. +docker build -t opea/text2image:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/Dockerfile . +``` + +### 2.1.2 Text-to-Image Service Image on Gaudi + +Build text-to-image service image on Gaudi with below command: + +```bash +cd ../.. +docker build -t opea/text2image-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/Dockerfile.intel_hpu . +``` + +## 2.2 Start Text-to-Image Service + +### 2.2.1 Start Text-to-Image Service on Xeon + +Start text-to-image service on Xeon with below command: + +```bash +docker run --ipc=host -p 9379:9379 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN -e MODEL=$MODEL opea/text2image:latest +``` + +### 2.2.2 Start Text-to-Image Service on Gaudi + +Start text-to-image service on Gaudi with below command: + +```bash +docker run -p 9379:9379 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN -e MODEL=$MODEL opea/text2image-gaudi:latest +``` + +# 3 Test Text-to-Image Service + +```bash +http_proxy="" curl http://localhost:9379/v1/text2image -XPOST -d '{"prompt":"An astronaut riding a green horse", "num_images_per_prompt":1}' -H 'Content-Type: application/json' +``` diff --git a/comps/text2image/__init__.py b/comps/text2image/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/text2image/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/text2image/requirements.txt b/comps/text2image/requirements.txt new file mode 100644 index 000000000..f639b4ba1 --- /dev/null +++ b/comps/text2image/requirements.txt @@ -0,0 +1,15 @@ +accelerate +datasets +diffusers +docarray[full] +fastapi +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +pydantic==2.7.2 +pydub +shortuuid +torch +transformers +uvicorn diff --git a/comps/text2image/text2image.py b/comps/text2image/text2image.py new file mode 100644 index 000000000..050df49d5 --- /dev/null +++ b/comps/text2image/text2image.py @@ -0,0 +1,123 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import base64 +import os +import threading +import time + +import torch +from diffusers import DiffusionPipeline + +from comps import ( + CustomLogger, + SDInputs, + SDOutputs, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +logger = CustomLogger("text2image") +pipe = None +args = None +initialization_lock = threading.Lock() +initialized = False + + +def initialize(): + global pipe, args, initialized + with initialization_lock: + if not initialized: + # initialize model and tokenizer + if os.getenv("MODEL", None): + args.model_name_or_path = os.getenv("MODEL") + kwargs = {} + if args.bf16: + kwargs["torch_dtype"] = torch.bfloat16 + if not args.token: + args.token = os.getenv("HF_TOKEN") + if args.device == "hpu": + kwargs.update( + { + "use_habana": True, + "use_hpu_graphs": args.use_hpu_graphs, + "gaudi_config": "Habana/stable-diffusion", + "token": args.token, + } + ) + if "stable-diffusion-3" in args.model_name_or_path: + from optimum.habana.diffusers import GaudiStableDiffusion3Pipeline + + pipe = GaudiStableDiffusion3Pipeline.from_pretrained( + args.model_name_or_path, + **kwargs, + ) + elif "stable-diffusion" in args.model_name_or_path.lower() or "flux" in args.model_name_or_path.lower(): + from optimum.habana.diffusers import AutoPipelineForText2Image + + pipe = AutoPipelineForText2Image.from_pretrained( + args.model_name_or_path, + **kwargs, + ) + else: + raise NotImplementedError( + "Only support stable-diffusion, stable-diffusion-xl, stable-diffusion-3 and flux now, " + + f"model {args.model_name_or_path} not supported." + ) + elif args.device == "cpu": + pipe = DiffusionPipeline.from_pretrained(args.model_name_or_path, token=args.token, **kwargs) + else: + raise NotImplementedError(f"Only support cpu and hpu device now, device {args.device} not supported.") + logger.info("Stable Diffusion model initialized.") + initialized = True + + +@register_microservice( + name="opea_service@text2image", + service_type=ServiceType.TEXT2IMAGE, + endpoint="/v1/text2image", + host="0.0.0.0", + port=9379, + input_datatype=SDInputs, + output_datatype=SDOutputs, +) +@register_statistics(names=["opea_service@text2image"]) +def text2image(input: SDInputs): + initialize() + start = time.time() + prompt = input.prompt + num_images_per_prompt = input.num_images_per_prompt + + generator = torch.manual_seed(args.seed) + images = pipe(prompt, generator=generator, num_images_per_prompt=num_images_per_prompt).images + image_path = os.path.join(os.getcwd(), prompt.strip().replace(" ", "_").replace("/", "")) + os.makedirs(image_path, exist_ok=True) + results = [] + for i, image in enumerate(images): + save_path = os.path.join(image_path, f"image_{i+1}.png") + image.save(save_path) + with open(save_path, "rb") as f: + bytes = f.read() + b64_str = base64.b64encode(bytes).decode() + results.append(b64_str) + statistics_dict["opea_service@text2image"].append_latency(time.time() - start, None) + return SDOutputs(images=results) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model_name_or_path", type=str, default="stabilityai/stable-diffusion-3-medium-diffusers") + parser.add_argument("--use_hpu_graphs", default=False, action="store_true") + parser.add_argument("--device", type=str, default="cpu") + parser.add_argument("--token", type=str, default=None) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--bf16", action="store_true") + + args = parser.parse_args() + + logger.info("Text2image server started.") + opea_microservices["opea_service@text2image"].start() diff --git a/tests/test_image2video.sh b/tests/image2video/test_image2video.sh similarity index 100% rename from tests/test_image2video.sh rename to tests/image2video/test_image2video.sh diff --git a/tests/text2image/test_text2image.sh b/tests/text2image/test_text2image.sh new file mode 100644 index 000000000..0872e6cb5 --- /dev/null +++ b/tests/text2image/test_text2image.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/text2image:latest -f comps/text2image/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/text2image built fail" + exit 1 + else + echo "opea/text2image built successful" + fi +} + +function start_service() { + unset http_proxy + docker run -d --name="test-comps-text2image" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e MODEL=stabilityai/stable-diffusion-xl-base-1.0 -p 9379:9379 --ipc=host opea/text2image:latest + sleep 30s +} + +function validate_microservice() { + result=$(http_proxy="" curl http://localhost:9379/v1/text2image -XPOST -d '{"prompt":"An astronaut riding a green horse", "num_images_per_prompt":1}' -H 'Content-Type: application/json') + if [[ $result == *"images"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs test-comps-text2image + exit 1 + fi + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-text2image*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main