From 89a22571f1b021e4b63a82b8fed0a7c7805e72d7 Mon Sep 17 00:00:00 2001
From: "Ye, Xinyu" <xinyu.ye@intel.com>
Date: Mon, 23 Sep 2024 04:26:54 -0400
Subject: [PATCH 1/7] add stable diffusion microservice.

Signed-off-by: Ye, Xinyu <xinyu.ye@intel.com>
---
 comps/__init__.py                             |  1 +
 comps/cores/proto/docarray.py                 |  5 +
 comps/text2image/Dockerfile                   | 18 ++++
 comps/text2image/README.md                    | 91 +++++++++++++++++++
 comps/text2image/__init__.py                  |  2 +
 comps/text2image/dependency/Dockerfile        | 25 +++++
 .../dependency/Dockerfile.intel_hpu           | 34 +++++++
 comps/text2image/dependency/requirements.txt  |  6 ++
 comps/text2image/dependency/sd_server.py      | 90 ++++++++++++++++++
 comps/text2image/requirements.txt             | 11 +++
 comps/text2image/text2image.py                | 46 ++++++++++
 11 files changed, 329 insertions(+)
 create mode 100644 comps/text2image/Dockerfile
 create mode 100644 comps/text2image/README.md
 create mode 100644 comps/text2image/__init__.py
 create mode 100644 comps/text2image/dependency/Dockerfile
 create mode 100644 comps/text2image/dependency/Dockerfile.intel_hpu
 create mode 100644 comps/text2image/dependency/requirements.txt
 create mode 100644 comps/text2image/dependency/sd_server.py
 create mode 100644 comps/text2image/requirements.txt
 create mode 100644 comps/text2image/text2image.py

diff --git a/comps/__init__.py b/comps/__init__.py
index 2a65ce4fa..42d23b11a 100644
--- a/comps/__init__.py
+++ b/comps/__init__.py
@@ -26,6 +26,7 @@
     ImagesPath,
     VideoPath,
     ImageDoc,
+    SDInputs,
     TextImageDoc,
     MultimodalDoc,
     EmbedMultimodalDoc,
diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py
index 1776e35ae..cc27741d4 100644
--- a/comps/cores/proto/docarray.py
+++ b/comps/cores/proto/docarray.py
@@ -264,6 +264,11 @@ class LVMVideoDoc(BaseDoc):
     max_new_tokens: conint(ge=0, le=1024) = 512
 
 
+class SDInputs(BaseDoc):
+    prompt: str
+    num_images_per_prompt: int = 1
+
+
 class ImagePath(BaseDoc):
     image_path: str
 
diff --git a/comps/text2image/Dockerfile b/comps/text2image/Dockerfile
new file mode 100644
index 000000000..ce179338a
--- /dev/null
+++ b/comps/text2image/Dockerfile
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+# Set environment variables
+ENV LANG=en_US.UTF-8
+
+COPY comps /home/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/comps/text2image/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home
+
+WORKDIR /home/comps/text2image
+
+ENTRYPOINT ["python", "text2image.py"]
\ No newline at end of file
diff --git a/comps/text2image/README.md b/comps/text2image/README.md
new file mode 100644
index 000000000..58e4fd04b
--- /dev/null
+++ b/comps/text2image/README.md
@@ -0,0 +1,91 @@
+# Text-to-Image Microservice
+
+Text-to-Image is a task that generate image conditioning on the provided text. This microservice supports text-to-image task by using Stable Diffusion (SD) model.
+
+# 🚀1. Start Microservice with Python (Option 1)
+
+## 1.1 Install Requirements
+
+```bash
+pip install -r requirements.txt
+pip install -r dependency/requirements.txt
+```
+
+## 1.2 Start SD Service
+
+```bash
+# Start SD service
+cd dependency/
+python sd_server.py --token $HF_TOKEN
+```
+
+## 1.3 Start Text-to-Image Microservice
+
+```bash
+cd ..
+# Start the OPEA Microservice
+python text2image.py
+```
+
+# 🚀2. Start Microservice with Docker (Option 2)
+
+## 2.1 Build Images
+
+Select Stable Diffusion (SD) model and assign its name to a enviroment variable as below:
+```bash
+# SD3
+export MODEL=stabilityai/stable-diffusion-3-medium-diffusers
+# SDXL
+export MODEL=stabilityai/stable-diffusion-xl-base-1.0
+```
+
+### 2.1.1 SD Server Image
+
+Build SD server image on Xeon with below command:
+
+```bash
+cd ../..
+docker build -t opea/sd:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg MODEL=$MODEL -f comps/text2image/dependency/Dockerfile .
+```
+
+Build SD server image on Gaudi with below command:
+
+```bash
+cd ../..
+docker build -t opea/sd-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg MODEL=$MODEL -f comps/text2image/dependency/Dockerfile.intel_hpu .
+```
+
+### 2.1.2 Text-to-Image Service Image
+
+```bash
+docker build -t opea/text2image:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/Dockerfile .
+```
+
+## 2.2 Start SD and Text-to-Image Service
+
+### 2.2.1 Start SD server
+
+Start SD server on Xeon with below command:
+
+```bash
+docker run --ipc=host -p 9378:9378 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN opea/sd:latest
+```
+
+Start SD server on Gaudi with below command:
+
+```bash
+docker run -p 9378:9378 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN opea/sd-gaudi:latest
+```
+
+### 2.2.2 Start Text-to-Image service
+
+```bash
+ip_address=$(hostname -I | awk '{print $1}')
+docker run -p 9379:9379 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e SD_ENDPOINT=http://$ip_address:9378 opea/text2image:latest
+```
+
+### 2.2.3 Test
+
+```bash
+http_proxy="" curl http://localhost:9379/v1/text2image -XPOST -d '{"prompt":"An astronaut riding a green horse", "num_images_per_prompt":1}' -H 'Content-Type: application/json'
+```
diff --git a/comps/text2image/__init__.py b/comps/text2image/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/comps/text2image/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/text2image/dependency/Dockerfile b/comps/text2image/dependency/Dockerfile
new file mode 100644
index 000000000..bd64c1d18
--- /dev/null
+++ b/comps/text2image/dependency/Dockerfile
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+# Set environment variables
+ENV LANG=en_US.UTF-8
+
+ARG ARCH="cpu"
+
+ARG MODEL
+
+COPY comps /home/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
+    pip install --no-cache-dir -r /home/comps/text2image/dependency/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home
+
+WORKDIR /home/comps/text2image/sd
+
+RUN echo python sd_server.py --model_name_or_path $MODEL >> run.sh
+
+CMD bash run.sh
\ No newline at end of file
diff --git a/comps/text2image/dependency/Dockerfile.intel_hpu b/comps/text2image/dependency/Dockerfile.intel_hpu
new file mode 100644
index 000000000..1e1da0803
--- /dev/null
+++ b/comps/text2image/dependency/Dockerfile.intel_hpu
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# HABANA environment
+# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu
+FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY comps /home/user/comps
+
+RUN chown -R user /home/user/comps/text2image
+
+RUN rm -rf /etc/ssh/ssh_host*
+USER user
+# Set environment variables
+ENV LANG=en_US.UTF-8
+ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana
+
+ARG MODEL
+
+# Install requirements and optimum habana
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/text2image/dependency/requirements.txt && \
+    pip install --no-cache-dir optimum[habana]
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/text2image/dependency
+
+RUN echo python sd_server.py --device hpu --bf16 --model_name_or_path $MODEL >> run.sh
+
+CMD bash run.sh
\ No newline at end of file
diff --git a/comps/text2image/dependency/requirements.txt b/comps/text2image/dependency/requirements.txt
new file mode 100644
index 000000000..ad8f5e96d
--- /dev/null
+++ b/comps/text2image/dependency/requirements.txt
@@ -0,0 +1,6 @@
+accelerate
+diffusers
+fastapi
+torch
+transformers
+uvicorn
diff --git a/comps/text2image/dependency/sd_server.py b/comps/text2image/dependency/sd_server.py
new file mode 100644
index 000000000..692d2eaf4
--- /dev/null
+++ b/comps/text2image/dependency/sd_server.py
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""Stand-alone Stable Diffusion FastAPI Server."""
+
+import argparse
+import os
+import time
+
+import torch
+import uvicorn
+from diffusers import DiffusionPipeline
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse, Response
+
+app = FastAPI()
+
+
+@app.post("/generate")
+async def generate(request: Request) -> Response:
+    print("SD generation begin.")
+    request_dict = await request.json()
+    prompt = request_dict.pop("prompt")
+    num_images_per_prompt = request_dict.pop("num_images_per_prompt", 1)
+
+    start = time.time()
+    generator = torch.manual_seed(args.seed)
+    images = pipe(prompt, generator=generator, num_images_per_prompt=num_images_per_prompt).images
+    image_path = os.path.join(os.getcwd(), prompt.strip().replace(" ", "_").replace("/", ""))
+    os.makedirs(image_path, exist_ok=True)
+    for i, image in enumerate(images):
+        image.save(os.path.join(image_path, f"image_{i+1}.png"))
+    end = time.time()
+    print(f"SD Images output in {image_path}, time = {end-start}s")
+    return JSONResponse({"image_path": image_path})
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=9378)
+    parser.add_argument("--model_name_or_path", type=str, default="stabilityai/stable-diffusion-3-medium-diffusers")
+    parser.add_argument("--use_hpu_graphs", default=False, action="store_true")
+    parser.add_argument("--device", type=str, default="cpu")
+    parser.add_argument("--token", type=str, default=None)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--bf16", action="store_true")
+
+    args = parser.parse_args()
+    if not args.token:
+        args.token = os.getenv("HF_TOKEN")
+    if args.device == "hpu":
+        kwargs = {
+            "use_habana": True,
+            "use_hpu_graphs": args.use_hpu_graphs,
+            "gaudi_config": "Habana/stable-diffusion",
+            "token": args.token
+        }
+        if args.bf16:
+            kwargs["torch_dtype"] = torch.bfloat16
+        if "stable-diffusion-3" in args.model_name_or_path:
+            from optimum.habana.diffusers import GaudiStableDiffusion3Pipeline
+
+            pipe = GaudiStableDiffusion3Pipeline.from_pretrained(
+                args.model_name_or_path,
+                **kwargs,
+            )
+        elif "stable-diffusion-xl" in args.model_name_or_path:
+            from optimum.habana.diffusers import GaudiStableDiffusionXLPipeline
+
+            pipe = GaudiStableDiffusionXLPipeline.from_pretrained(
+                args.model_name_or_path,
+                **kwargs,
+            )
+        else:
+            raise NotImplementedError(
+                "Only support stable-diffusion-3 and stable-diffusion-xl now, " + \
+                f"model {args.model_name_or_path} not supported."
+            )
+    elif args.device == "cpu":
+        pipe = DiffusionPipeline.from_pretrained(args.model_name_or_path, token=args.token)
+    else:
+        raise NotImplementedError(f"Only support cpu and hpu device now, device {args.device} not supported.")
+    print("Stable Diffusion model initialized.")
+
+    uvicorn.run(
+        app,
+        host=args.host,
+        port=args.port,
+        log_level="debug",
+    )
diff --git a/comps/text2image/requirements.txt b/comps/text2image/requirements.txt
new file mode 100644
index 000000000..069279834
--- /dev/null
+++ b/comps/text2image/requirements.txt
@@ -0,0 +1,11 @@
+datasets
+docarray[full]
+fastapi
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+prometheus-fastapi-instrumentator
+pydantic==2.7.2
+pydub
+shortuuid
+uvicorn
diff --git a/comps/text2image/text2image.py b/comps/text2image/text2image.py
new file mode 100644
index 000000000..5154ff76f
--- /dev/null
+++ b/comps/text2image/text2image.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import json
+import os
+import time
+
+import requests
+
+from comps import (
+    SDInputs,
+    ServiceType,
+    ImagePath,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+    statistics_dict,
+)
+
+
+@register_microservice(
+    name="opea_service@text2image",
+    service_type=ServiceType.TEXT2IMAGE,
+    endpoint="/v1/text2image",
+    host="0.0.0.0",
+    port=9379,
+    input_datatype=SDInputs,
+    output_datatype=ImagePath,
+)
+@register_statistics(names=["opea_service@text2image"])
+async def text2image(input: SDInputs):
+    start = time.time()
+    inputs = {"prompt": input.prompt, "num_images_per_prompt": input.num_images_per_prompt}
+    image_path = requests.post(url=f"{sd_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}).json()[
+        "image_path"
+    ]
+
+    statistics_dict["opea_service@text2image"].append_latency(time.time() - start, None)
+    return ImagePath(image_path=image_path)
+
+
+if __name__ == "__main__":
+    sd_endpoint = os.getenv("SD_ENDPOINT", "http://localhost:9378")
+    print("Text2image server started.")
+    opea_microservices["opea_service@text2image"].start()

From f4f77916aab642e32d4c827712f078049a0cfe41 Mon Sep 17 00:00:00 2001
From: "Ye, Xinyu" <xinyu.ye@intel.com>
Date: Mon, 23 Sep 2024 22:40:56 -0400
Subject: [PATCH 2/7] added test.

Signed-off-by: Ye, Xinyu <xinyu.ye@intel.com>
---
 .../docker/compose/text2image-compose-cd.yaml | 18 +++++
 comps/text2image/dependency/Dockerfile        |  2 +-
 tests/{ => image2video}/test_image2video.sh   |  0
 tests/text2image/test_text2image.sh           | 68 +++++++++++++++++++
 4 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/docker/compose/text2image-compose-cd.yaml
 rename tests/{ => image2video}/test_image2video.sh (100%)
 create mode 100644 tests/text2image/test_text2image.sh

diff --git a/.github/workflows/docker/compose/text2image-compose-cd.yaml b/.github/workflows/docker/compose/text2image-compose-cd.yaml
new file mode 100644
index 000000000..1382fede2
--- /dev/null
+++ b/.github/workflows/docker/compose/text2image-compose-cd.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+# images used by GenAIExamples: text2image,sd,sd-gaudi
+services:
+  text2image:
+    build:
+      dockerfile: comps/text2image/Dockerfile
+    image: ${REGISTRY:-opea}/text2image:${TAG:-latest}
+  sd:
+    build:
+      dockerfile: comps/text2image/dependency/Dockerfile
+    image: ${REGISTRY:-opea}/sd:${TAG:-latest}
+  sd-gaudi:
+    build:
+      dockerfile: comps/text2image/dependency/Dockerfile.intel_hpu
+    image: ${REGISTRY:-opea}/sd-gaudi:${TAG:-latest}
diff --git a/comps/text2image/dependency/Dockerfile b/comps/text2image/dependency/Dockerfile
index bd64c1d18..7732d0291 100644
--- a/comps/text2image/dependency/Dockerfile
+++ b/comps/text2image/dependency/Dockerfile
@@ -18,7 +18,7 @@ RUN pip install --no-cache-dir --upgrade pip && \
 
 ENV PYTHONPATH=$PYTHONPATH:/home
 
-WORKDIR /home/comps/text2image/sd
+WORKDIR /home/comps/text2image/dependency
 
 RUN echo python sd_server.py --model_name_or_path $MODEL >> run.sh
 
diff --git a/tests/test_image2video.sh b/tests/image2video/test_image2video.sh
similarity index 100%
rename from tests/test_image2video.sh
rename to tests/image2video/test_image2video.sh
diff --git a/tests/text2image/test_text2image.sh b/tests/text2image/test_text2image.sh
new file mode 100644
index 000000000..04f401d3a
--- /dev/null
+++ b/tests/text2image/test_text2image.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_docker_images() {
+    cd $WORKPATH
+    echo $(pwd)
+    docker build --no-cache -t opea/sd:latest --build-arg MODEL=stabilityai/stable-diffusion-xl-base-1.0 -f comps/text2image/dependency/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/sd built fail"
+        exit 1
+    else
+        echo "opea/sd built successful"
+    fi
+    docker build --no-cache -t opea/text2image:latest -f comps/text2image/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/text2image built fail"
+        exit 1
+    else
+        echo "opea/text2image built successful"
+    fi
+}
+
+function start_service() {
+    unset http_proxy
+    docker run -d --name="test-comps-text2image-sd" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9378:9378 --ipc=host opea/sd:latest
+    docker run -d --name="test-comps-text2image" -e SD_ENDPOINT=http://$ip_address:9378 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9379:9379 --ipc=host opea/text2image:latest
+    sleep 3m
+}
+
+function validate_microservice() {
+    result=$(http_proxy="" curl http://localhost:9379/v1/text2image -XPOST -d '{"prompt":"An astronaut riding a green horse", "num_images_per_prompt":1}' -H 'Content-Type: application/json')
+    if [[ $result == *"green_horse"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong."
+        docker logs test-comps-text2image-sd
+        docker logs test-comps-text2image
+        exit 1
+    fi
+
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-text2image*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main

From 911698540441fd4b8798788c8663d345039426cf Mon Sep 17 00:00:00 2001
From: "Ye, Xinyu" <xinyu.ye@intel.com>
Date: Tue, 24 Sep 2024 03:56:17 -0400
Subject: [PATCH 3/7] changed output to images bytes data

Signed-off-by: Ye, Xinyu <xinyu.ye@intel.com>
---
 comps/__init__.py                        |  1 +
 comps/cores/proto/docarray.py            |  4 ++++
 comps/text2image/dependency/sd_server.py | 11 +++++++++--
 comps/text2image/text2image.py           | 10 +++++-----
 tests/text2image/test_text2image.sh      |  2 +-
 5 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/comps/__init__.py b/comps/__init__.py
index 42d23b11a..557b33f9e 100644
--- a/comps/__init__.py
+++ b/comps/__init__.py
@@ -27,6 +27,7 @@
     VideoPath,
     ImageDoc,
     SDInputs,
+    SDOutputs,
     TextImageDoc,
     MultimodalDoc,
     EmbedMultimodalDoc,
diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py
index cc27741d4..c26d59fba 100644
--- a/comps/cores/proto/docarray.py
+++ b/comps/cores/proto/docarray.py
@@ -269,6 +269,10 @@ class SDInputs(BaseDoc):
     num_images_per_prompt: int = 1
 
 
+class SDOutputs(BaseDoc):
+    images: list
+
+
 class ImagePath(BaseDoc):
     image_path: str
 
diff --git a/comps/text2image/dependency/sd_server.py b/comps/text2image/dependency/sd_server.py
index 692d2eaf4..92926553f 100644
--- a/comps/text2image/dependency/sd_server.py
+++ b/comps/text2image/dependency/sd_server.py
@@ -3,6 +3,7 @@
 """Stand-alone Stable Diffusion FastAPI Server."""
 
 import argparse
+import base64
 import os
 import time
 
@@ -27,11 +28,17 @@ async def generate(request: Request) -> Response:
     images = pipe(prompt, generator=generator, num_images_per_prompt=num_images_per_prompt).images
     image_path = os.path.join(os.getcwd(), prompt.strip().replace(" ", "_").replace("/", ""))
     os.makedirs(image_path, exist_ok=True)
+    results = []
     for i, image in enumerate(images):
-        image.save(os.path.join(image_path, f"image_{i+1}.png"))
+        save_path = os.path.join(image_path, f"image_{i+1}.png")
+        image.save(save_path)
+        with open(save_path, "rb") as f:
+            bytes = f.read()
+        b64_str = base64.b64encode(bytes).decode()
+        results.append(b64_str)
     end = time.time()
     print(f"SD Images output in {image_path}, time = {end-start}s")
-    return JSONResponse({"image_path": image_path})
+    return JSONResponse({"images": results})
 
 
 if __name__ == "__main__":
diff --git a/comps/text2image/text2image.py b/comps/text2image/text2image.py
index 5154ff76f..659653bff 100644
--- a/comps/text2image/text2image.py
+++ b/comps/text2image/text2image.py
@@ -10,8 +10,8 @@
 
 from comps import (
     SDInputs,
+    SDOutputs,
     ServiceType,
-    ImagePath,
     opea_microservices,
     register_microservice,
     register_statistics,
@@ -26,18 +26,18 @@
     host="0.0.0.0",
     port=9379,
     input_datatype=SDInputs,
-    output_datatype=ImagePath,
+    output_datatype=SDOutputs,
 )
 @register_statistics(names=["opea_service@text2image"])
 async def text2image(input: SDInputs):
     start = time.time()
     inputs = {"prompt": input.prompt, "num_images_per_prompt": input.num_images_per_prompt}
-    image_path = requests.post(url=f"{sd_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}).json()[
-        "image_path"
+    images = requests.post(url=f"{sd_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}).json()[
+        "images"
     ]
 
     statistics_dict["opea_service@text2image"].append_latency(time.time() - start, None)
-    return ImagePath(image_path=image_path)
+    return SDOutputs(images=images)
 
 
 if __name__ == "__main__":
diff --git a/tests/text2image/test_text2image.sh b/tests/text2image/test_text2image.sh
index 04f401d3a..bf77812b6 100644
--- a/tests/text2image/test_text2image.sh
+++ b/tests/text2image/test_text2image.sh
@@ -35,7 +35,7 @@ function start_service() {
 
 function validate_microservice() {
     result=$(http_proxy="" curl http://localhost:9379/v1/text2image -XPOST -d '{"prompt":"An astronaut riding a green horse", "num_images_per_prompt":1}' -H 'Content-Type: application/json')
-    if [[ $result == *"green_horse"* ]]; then
+    if [[ $result == *"images"* ]]; then
         echo "Result correct."
     else
         echo "Result wrong."

From bd39cd0adff3d7b269ca6d95c41cf068f1705c8c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 24 Sep 2024 09:05:27 +0000
Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/text2image/README.md               | 3 ++-
 comps/text2image/dependency/sd_server.py | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/comps/text2image/README.md b/comps/text2image/README.md
index 58e4fd04b..8bf1c492b 100644
--- a/comps/text2image/README.md
+++ b/comps/text2image/README.md
@@ -31,7 +31,8 @@ python text2image.py
 
 ## 2.1 Build Images
 
-Select Stable Diffusion (SD) model and assign its name to a enviroment variable as below:
+Select Stable Diffusion (SD) model and assign its name to a environment variable as below:
+
 ```bash
 # SD3
 export MODEL=stabilityai/stable-diffusion-3-medium-diffusers
diff --git a/comps/text2image/dependency/sd_server.py b/comps/text2image/dependency/sd_server.py
index 92926553f..db076ff36 100644
--- a/comps/text2image/dependency/sd_server.py
+++ b/comps/text2image/dependency/sd_server.py
@@ -60,7 +60,7 @@ async def generate(request: Request) -> Response:
             "use_habana": True,
             "use_hpu_graphs": args.use_hpu_graphs,
             "gaudi_config": "Habana/stable-diffusion",
-            "token": args.token
+            "token": args.token,
         }
         if args.bf16:
             kwargs["torch_dtype"] = torch.bfloat16
@@ -80,8 +80,8 @@ async def generate(request: Request) -> Response:
             )
         else:
             raise NotImplementedError(
-                "Only support stable-diffusion-3 and stable-diffusion-xl now, " + \
-                f"model {args.model_name_or_path} not supported."
+                "Only support stable-diffusion-3 and stable-diffusion-xl now, "
+                + f"model {args.model_name_or_path} not supported."
             )
     elif args.device == "cpu":
         pipe = DiffusionPipeline.from_pretrained(args.model_name_or_path, token=args.token)

From 045a11bdb6b4fd96a74bb371161e926d5a3395de Mon Sep 17 00:00:00 2001
From: "Ye, Xinyu" <xinyu.ye@intel.com>
Date: Fri, 27 Sep 2024 03:27:09 -0400
Subject: [PATCH 5/7] unified inference and wrapper into one microservice.

Signed-off-by: Ye, Xinyu <xinyu.ye@intel.com>
---
 .../docker/compose/text2image-compose-cd.yaml | 12 +--
 comps/text2image/Dockerfile                   |  7 +-
 .../{dependency => }/Dockerfile.intel_hpu     | 12 +--
 comps/text2image/README.md                    | 75 +++++++-------
 comps/text2image/dependency/Dockerfile        | 25 -----
 comps/text2image/dependency/requirements.txt  |  6 --
 comps/text2image/dependency/sd_server.py      | 97 ------------------
 comps/text2image/requirements.txt             |  6 +-
 comps/text2image/text2image.py                | 98 ++++++++++++++++---
 tests/text2image/test_text2image.sh           | 15 +--
 10 files changed, 149 insertions(+), 204 deletions(-)
 rename comps/text2image/{dependency => }/Dockerfile.intel_hpu (66%)
 delete mode 100644 comps/text2image/dependency/Dockerfile
 delete mode 100644 comps/text2image/dependency/requirements.txt
 delete mode 100644 comps/text2image/dependency/sd_server.py

diff --git a/.github/workflows/docker/compose/text2image-compose-cd.yaml b/.github/workflows/docker/compose/text2image-compose-cd.yaml
index 1382fede2..d2737922f 100644
--- a/.github/workflows/docker/compose/text2image-compose-cd.yaml
+++ b/.github/workflows/docker/compose/text2image-compose-cd.yaml
@@ -2,17 +2,13 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # this file should be run in the root of the repo
-# images used by GenAIExamples: text2image,sd,sd-gaudi
+# images used by GenAIExamples: text2image,text2image-gaudi
 services:
   text2image:
     build:
       dockerfile: comps/text2image/Dockerfile
     image: ${REGISTRY:-opea}/text2image:${TAG:-latest}
-  sd:
+  text2image-gaudi:
     build:
-      dockerfile: comps/text2image/dependency/Dockerfile
-    image: ${REGISTRY:-opea}/sd:${TAG:-latest}
-  sd-gaudi:
-    build:
-      dockerfile: comps/text2image/dependency/Dockerfile.intel_hpu
-    image: ${REGISTRY:-opea}/sd-gaudi:${TAG:-latest}
+      dockerfile: comps/text2image/Dockerfile.intel_hpu
+    image: ${REGISTRY:-opea}/text2image-gaudi:${TAG:-latest}
diff --git a/comps/text2image/Dockerfile b/comps/text2image/Dockerfile
index ce179338a..50dc6e3b7 100644
--- a/comps/text2image/Dockerfile
+++ b/comps/text2image/Dockerfile
@@ -6,13 +6,18 @@ FROM python:3.11-slim
 # Set environment variables
 ENV LANG=en_US.UTF-8
 
+ARG ARCH="cpu"
+
 COPY comps /home/comps
 
 RUN pip install --no-cache-dir --upgrade pip && \
+    if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
     pip install --no-cache-dir -r /home/comps/text2image/requirements.txt
 
 ENV PYTHONPATH=$PYTHONPATH:/home
 
 WORKDIR /home/comps/text2image
 
-ENTRYPOINT ["python", "text2image.py"]
\ No newline at end of file
+RUN echo python text2image.py --bf16 >> run.sh
+
+CMD bash run.sh
\ No newline at end of file
diff --git a/comps/text2image/dependency/Dockerfile.intel_hpu b/comps/text2image/Dockerfile.intel_hpu
similarity index 66%
rename from comps/text2image/dependency/Dockerfile.intel_hpu
rename to comps/text2image/Dockerfile.intel_hpu
index 1e1da0803..ffa228cf8 100644
--- a/comps/text2image/dependency/Dockerfile.intel_hpu
+++ b/comps/text2image/Dockerfile.intel_hpu
@@ -16,19 +16,15 @@ RUN rm -rf /etc/ssh/ssh_host*
 USER user
 # Set environment variables
 ENV LANG=en_US.UTF-8
-ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana
-
-ARG MODEL
+ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana
 
 # Install requirements and optimum habana
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /home/user/comps/text2image/dependency/requirements.txt && \
+    pip install --no-cache-dir -r /home/user/comps/text2image/requirements.txt && \
     pip install --no-cache-dir optimum[habana]
 
-ENV PYTHONPATH=$PYTHONPATH:/home/user
-
-WORKDIR /home/user/comps/text2image/dependency
+WORKDIR /home/user/comps/text2image
 
-RUN echo python sd_server.py --device hpu --bf16 --model_name_or_path $MODEL >> run.sh
+RUN echo python text2image.py --device hpu --use_hpu_graphs --bf16 >> run.sh
 
 CMD bash run.sh
\ No newline at end of file
diff --git a/comps/text2image/README.md b/comps/text2image/README.md
index 8bf1c492b..e4eadc3cf 100644
--- a/comps/text2image/README.md
+++ b/comps/text2image/README.md
@@ -8,23 +8,33 @@ Text-to-Image is a task that generate image conditioning on the provided text. T
 
 ```bash
 pip install -r requirements.txt
-pip install -r dependency/requirements.txt
 ```
 
-## 1.2 Start SD Service
+## 1.2 Start Text-to-Image Microservice
+
+Select Stable Diffusion (SD) model and assign its name to a environment variable as below:
+
+```bash
+# SD1.5
+export MODEL=stable-diffusion-v1-5/stable-diffusion-v1-5
+# SD2.1
+export MODEL=stabilityai/stable-diffusion-2-1
+# SDXL
+export MODEL=stabilityai/stable-diffusion-xl-base-1.0
+# SD3
+export MODEL=stabilityai/stable-diffusion-3-medium-diffusers
+```
+
+Set huggingface token:
 
 ```bash
-# Start SD service
-cd dependency/
-python sd_server.py --token $HF_TOKEN
+export HF_TOKEN=<your huggingface token>
 ```
 
-## 1.3 Start Text-to-Image Microservice
+Start the OPEA Microservice:
 
 ```bash
-cd ..
-# Start the OPEA Microservice
-python text2image.py
+python text2image.py --bf16 --model_name_or_path $MODEL --token $HF_TOKEN
 ```
 
 # 🚀2. Start Microservice with Docker (Option 2)
@@ -34,58 +44,53 @@ python text2image.py
 Select Stable Diffusion (SD) model and assign its name to a environment variable as below:
 
 ```bash
-# SD3
-export MODEL=stabilityai/stable-diffusion-3-medium-diffusers
+# SD1.5
+export MODEL=stable-diffusion-v1-5/stable-diffusion-v1-5
+# SD2.1
+export MODEL=stabilityai/stable-diffusion-2-1
 # SDXL
 export MODEL=stabilityai/stable-diffusion-xl-base-1.0
+# SD3
+export MODEL=stabilityai/stable-diffusion-3-medium-diffusers
 ```
 
-### 2.1.1 SD Server Image
+### 2.1.1 Text-to-Image Service Image on Xeon
 
-Build SD server image on Xeon with below command:
+Build text-to-image service image on Xeon with below command:
 
 ```bash
 cd ../..
-docker build -t opea/sd:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg MODEL=$MODEL -f comps/text2image/dependency/Dockerfile .
+docker build -t opea/text2image:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/Dockerfile .
 ```
 
-Build SD server image on Gaudi with below command:
-
-```bash
-cd ../..
-docker build -t opea/sd-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg MODEL=$MODEL -f comps/text2image/dependency/Dockerfile.intel_hpu .
-```
+### 2.1.2 Text-to-Image Service Image on Gaudi
 
-### 2.1.2 Text-to-Image Service Image
+Build text-to-image service image on Gaudi with below command:
 
 ```bash
-docker build -t opea/text2image:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/Dockerfile .
+cd ../..
+docker build -t opea/text2image-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/Dockerfile.intel_hpu .
 ```
 
-## 2.2 Start SD and Text-to-Image Service
+## 2.2 Start Text-to-Image Service
 
-### 2.2.1 Start SD server
+### 2.2.1 Start Text-to-Image Service on Xeon
 
-Start SD server on Xeon with below command:
+Start text-to-image service on Xeon with below command:
 
 ```bash
-docker run --ipc=host -p 9378:9378 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN opea/sd:latest
+docker run --ipc=host -p 9379:9379 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN -e MODEL=$MODEL opea/text2image:latest
 ```
 
-Start SD server on Gaudi with below command:
-
-```bash
-docker run -p 9378:9378 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN opea/sd-gaudi:latest
-```
+### 2.2.2 Start Text-to-Image Service on Gaudi
 
-### 2.2.2 Start Text-to-Image service
+Start text-to-image service on Gaudi with below command:
 
 ```bash
-ip_address=$(hostname -I | awk '{print $1}')
-docker run -p 9379:9379 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e SD_ENDPOINT=http://$ip_address:9378 opea/text2image:latest
+docker run -p 9379:9379 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN -e MODEL=$MODEL opea/text2image-gaudi:latest
 ```
 
-### 2.2.3 Test
+# 3 Test Text-to-Image Service
 
 ```bash
 http_proxy="" curl http://localhost:9379/v1/text2image -XPOST -d '{"prompt":"An astronaut riding a green horse", "num_images_per_prompt":1}' -H 'Content-Type: application/json'
diff --git a/comps/text2image/dependency/Dockerfile b/comps/text2image/dependency/Dockerfile
deleted file mode 100644
index 7732d0291..000000000
--- a/comps/text2image/dependency/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-FROM python:3.11-slim
-
-# Set environment variables
-ENV LANG=en_US.UTF-8
-
-ARG ARCH="cpu"
-
-ARG MODEL
-
-COPY comps /home/comps
-
-RUN pip install --no-cache-dir --upgrade pip && \
-    if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
-    pip install --no-cache-dir -r /home/comps/text2image/dependency/requirements.txt
-
-ENV PYTHONPATH=$PYTHONPATH:/home
-
-WORKDIR /home/comps/text2image/dependency
-
-RUN echo python sd_server.py --model_name_or_path $MODEL >> run.sh
-
-CMD bash run.sh
\ No newline at end of file
diff --git a/comps/text2image/dependency/requirements.txt b/comps/text2image/dependency/requirements.txt
deleted file mode 100644
index ad8f5e96d..000000000
--- a/comps/text2image/dependency/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-accelerate
-diffusers
-fastapi
-torch
-transformers
-uvicorn
diff --git a/comps/text2image/dependency/sd_server.py b/comps/text2image/dependency/sd_server.py
deleted file mode 100644
index db076ff36..000000000
--- a/comps/text2image/dependency/sd_server.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-"""Stand-alone Stable Diffusion FastAPI Server."""
-
-import argparse
-import base64
-import os
-import time
-
-import torch
-import uvicorn
-from diffusers import DiffusionPipeline
-from fastapi import FastAPI, Request
-from fastapi.responses import JSONResponse, Response
-
-app = FastAPI()
-
-
-@app.post("/generate")
-async def generate(request: Request) -> Response:
-    print("SD generation begin.")
-    request_dict = await request.json()
-    prompt = request_dict.pop("prompt")
-    num_images_per_prompt = request_dict.pop("num_images_per_prompt", 1)
-
-    start = time.time()
-    generator = torch.manual_seed(args.seed)
-    images = pipe(prompt, generator=generator, num_images_per_prompt=num_images_per_prompt).images
-    image_path = os.path.join(os.getcwd(), prompt.strip().replace(" ", "_").replace("/", ""))
-    os.makedirs(image_path, exist_ok=True)
-    results = []
-    for i, image in enumerate(images):
-        save_path = os.path.join(image_path, f"image_{i+1}.png")
-        image.save(save_path)
-        with open(save_path, "rb") as f:
-            bytes = f.read()
-        b64_str = base64.b64encode(bytes).decode()
-        results.append(b64_str)
-    end = time.time()
-    print(f"SD Images output in {image_path}, time = {end-start}s")
-    return JSONResponse({"images": results})
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", type=str, default="0.0.0.0")
-    parser.add_argument("--port", type=int, default=9378)
-    parser.add_argument("--model_name_or_path", type=str, default="stabilityai/stable-diffusion-3-medium-diffusers")
-    parser.add_argument("--use_hpu_graphs", default=False, action="store_true")
-    parser.add_argument("--device", type=str, default="cpu")
-    parser.add_argument("--token", type=str, default=None)
-    parser.add_argument("--seed", type=int, default=42)
-    parser.add_argument("--bf16", action="store_true")
-
-    args = parser.parse_args()
-    if not args.token:
-        args.token = os.getenv("HF_TOKEN")
-    if args.device == "hpu":
-        kwargs = {
-            "use_habana": True,
-            "use_hpu_graphs": args.use_hpu_graphs,
-            "gaudi_config": "Habana/stable-diffusion",
-            "token": args.token,
-        }
-        if args.bf16:
-            kwargs["torch_dtype"] = torch.bfloat16
-        if "stable-diffusion-3" in args.model_name_or_path:
-            from optimum.habana.diffusers import GaudiStableDiffusion3Pipeline
-
-            pipe = GaudiStableDiffusion3Pipeline.from_pretrained(
-                args.model_name_or_path,
-                **kwargs,
-            )
-        elif "stable-diffusion-xl" in args.model_name_or_path:
-            from optimum.habana.diffusers import GaudiStableDiffusionXLPipeline
-
-            pipe = GaudiStableDiffusionXLPipeline.from_pretrained(
-                args.model_name_or_path,
-                **kwargs,
-            )
-        else:
-            raise NotImplementedError(
-                "Only support stable-diffusion-3 and stable-diffusion-xl now, "
-                + f"model {args.model_name_or_path} not supported."
-            )
-    elif args.device == "cpu":
-        pipe = DiffusionPipeline.from_pretrained(args.model_name_or_path, token=args.token)
-    else:
-        raise NotImplementedError(f"Only support cpu and hpu device now, device {args.device} not supported.")
-    print("Stable Diffusion model initialized.")
-
-    uvicorn.run(
-        app,
-        host=args.host,
-        port=args.port,
-        log_level="debug",
-    )
diff --git a/comps/text2image/requirements.txt b/comps/text2image/requirements.txt
index 069279834..b1fe57505 100644
--- a/comps/text2image/requirements.txt
+++ b/comps/text2image/requirements.txt
@@ -1,4 +1,6 @@
+accelerate
 datasets
+diffusers
 docarray[full]
 fastapi
 opentelemetry-api
@@ -8,4 +10,6 @@ prometheus-fastapi-instrumentator
 pydantic==2.7.2
 pydub
 shortuuid
-uvicorn
+torch
+transformers
+uvicorn
\ No newline at end of file
diff --git a/comps/text2image/text2image.py b/comps/text2image/text2image.py
index 659653bff..a5b44b47b 100644
--- a/comps/text2image/text2image.py
+++ b/comps/text2image/text2image.py
@@ -1,14 +1,17 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-
-import json
+import argparse
+import base64
 import os
+import threading
 import time
 
-import requests
+import torch
+from diffusers import DiffusionPipeline
 
 from comps import (
+    CustomLogger,
     SDInputs,
     SDOutputs,
     ServiceType,
@@ -19,6 +22,59 @@
 )
 
 
+logger = CustomLogger("text2image")
+pipe = None
+args = None
+initialization_lock = threading.Lock()
+initialized = False
+
+
+def initialize():
+    global pipe, args, initialized
+    with initialization_lock:
+        if not initialized:
+            # initialize model and tokenizer
+            if os.getenv("MODEL", None):
+                args.model_name_or_path = os.getenv("MODEL")
+            kwargs = {}
+            if args.bf16:
+                kwargs["torch_dtype"] = torch.bfloat16
+            if not args.token:
+                args.token = os.getenv("HF_TOKEN")
+            if args.device == "hpu":
+                kwargs.update({
+                    "use_habana": True,
+                    "use_hpu_graphs": args.use_hpu_graphs,
+                    "gaudi_config": "Habana/stable-diffusion",
+                    "token": args.token,
+                })
+                # if "stable-diffusion-3" in args.model_name_or_path:
+                #     from optimum.habana.diffusers import GaudiStableDiffusion3Pipeline
+
+                #     pipe = GaudiStableDiffusion3Pipeline.from_pretrained(
+                #         args.model_name_or_path,
+                #         **kwargs,
+                #     )
+                if "stable-diffusion" in args.model_name_or_path.lower() or "flux" in args.model_name_or_path.lower():
+                    from optimum.habana.diffusers import AutoPipelineForText2Image
+
+                    pipe = AutoPipelineForText2Image.from_pretrained(
+                        args.model_name_or_path,
+                        **kwargs,
+                    )
+                else:
+                    raise NotImplementedError(
+                        "Only support stable-diffusion, stable-diffusion-xl, stable-diffusion-3 and flux now, "
+                        + f"model {args.model_name_or_path} not supported."
+                    )
+            elif args.device == "cpu":
+                pipe = DiffusionPipeline.from_pretrained(args.model_name_or_path, token=args.token, **kwargs)
+            else:
+                raise NotImplementedError(f"Only support cpu and hpu device now, device {args.device} not supported.")
+            logger.info("Stable Diffusion model initialized.")
+            initialized = True
+
+
 @register_microservice(
     name="opea_service@text2image",
     service_type=ServiceType.TEXT2IMAGE,
@@ -29,18 +85,38 @@
     output_datatype=SDOutputs,
 )
 @register_statistics(names=["opea_service@text2image"])
-async def text2image(input: SDInputs):
+def text2image(input: SDInputs):
+    initialize()
     start = time.time()
-    inputs = {"prompt": input.prompt, "num_images_per_prompt": input.num_images_per_prompt}
-    images = requests.post(url=f"{sd_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}).json()[
-        "images"
-    ]
+    prompt = input.prompt
+    num_images_per_prompt = input.num_images_per_prompt
 
+    generator = torch.manual_seed(args.seed)
+    images = pipe(prompt, generator=generator, num_images_per_prompt=num_images_per_prompt).images
+    image_path = os.path.join(os.getcwd(), prompt.strip().replace(" ", "_").replace("/", ""))
+    os.makedirs(image_path, exist_ok=True)
+    results = []
+    for i, image in enumerate(images):
+        save_path = os.path.join(image_path, f"image_{i+1}.png")
+        image.save(save_path)
+        with open(save_path, "rb") as f:
+            bytes = f.read()
+        b64_str = base64.b64encode(bytes).decode()
+        results.append(b64_str)
     statistics_dict["opea_service@text2image"].append_latency(time.time() - start, None)
-    return SDOutputs(images=images)
+    return SDOutputs(images=results)
 
 
 if __name__ == "__main__":
-    sd_endpoint = os.getenv("SD_ENDPOINT", "http://localhost:9378")
-    print("Text2image server started.")
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name_or_path", type=str, default="stabilityai/stable-diffusion-3-medium-diffusers")
+    parser.add_argument("--use_hpu_graphs", default=False, action="store_true")
+    parser.add_argument("--device", type=str, default="cpu")
+    parser.add_argument("--token", type=str, default=None)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--bf16", action="store_true")
+
+    args = parser.parse_args()
+    
+    logger.info("Text2image server started.")
     opea_microservices["opea_service@text2image"].start()
diff --git a/tests/text2image/test_text2image.sh b/tests/text2image/test_text2image.sh
index bf77812b6..3e45bd441 100644
--- a/tests/text2image/test_text2image.sh
+++ b/tests/text2image/test_text2image.sh
@@ -10,14 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
-    docker build --no-cache -t opea/sd:latest --build-arg MODEL=stabilityai/stable-diffusion-xl-base-1.0 -f comps/text2image/dependency/Dockerfile .
-    if [ $? -ne 0 ]; then
-        echo "opea/sd built fail"
-        exit 1
-    else
-        echo "opea/sd built successful"
-    fi
-    docker build --no-cache -t opea/text2image:latest -f comps/text2image/Dockerfile .
+    docker build --no-cache -t opea/text2image:latest --build-arg MODEL=stabilityai/stable-diffusion-xl-base-1.0 -f comps/text2image/Dockerfile .
     if [ $? -ne 0 ]; then
         echo "opea/text2image built fail"
         exit 1
@@ -28,9 +21,8 @@ function build_docker_images() {
 
 function start_service() {
     unset http_proxy
-    docker run -d --name="test-comps-text2image-sd" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9378:9378 --ipc=host opea/sd:latest
-    docker run -d --name="test-comps-text2image" -e SD_ENDPOINT=http://$ip_address:9378 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9379:9379 --ipc=host opea/text2image:latest
-    sleep 3m
+    docker run -d --name="test-comps-text2image" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9379:9379 --ipc=host opea/text2image:latest
+    sleep 30s
 }
 
 function validate_microservice() {
@@ -39,7 +31,6 @@ function validate_microservice() {
         echo "Result correct."
     else
         echo "Result wrong."
-        docker logs test-comps-text2image-sd
         docker logs test-comps-text2image
         exit 1
     fi

From d4ba8c7e4a8069e7bd12dbd8690d50b556ee0069 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 27 Sep 2024 07:27:25 +0000
Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/text2image/requirements.txt |  2 +-
 comps/text2image/text2image.py    | 17 +++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/comps/text2image/requirements.txt b/comps/text2image/requirements.txt
index b1fe57505..f639b4ba1 100644
--- a/comps/text2image/requirements.txt
+++ b/comps/text2image/requirements.txt
@@ -12,4 +12,4 @@ pydub
 shortuuid
 torch
 transformers
-uvicorn
\ No newline at end of file
+uvicorn
diff --git a/comps/text2image/text2image.py b/comps/text2image/text2image.py
index a5b44b47b..9cb3caa97 100644
--- a/comps/text2image/text2image.py
+++ b/comps/text2image/text2image.py
@@ -21,7 +21,6 @@
     statistics_dict,
 )
 
-
 logger = CustomLogger("text2image")
 pipe = None
 args = None
@@ -42,12 +41,14 @@ def initialize():
             if not args.token:
                 args.token = os.getenv("HF_TOKEN")
             if args.device == "hpu":
-                kwargs.update({
-                    "use_habana": True,
-                    "use_hpu_graphs": args.use_hpu_graphs,
-                    "gaudi_config": "Habana/stable-diffusion",
-                    "token": args.token,
-                })
+                kwargs.update(
+                    {
+                        "use_habana": True,
+                        "use_hpu_graphs": args.use_hpu_graphs,
+                        "gaudi_config": "Habana/stable-diffusion",
+                        "token": args.token,
+                    }
+                )
                 # if "stable-diffusion-3" in args.model_name_or_path:
                 #     from optimum.habana.diffusers import GaudiStableDiffusion3Pipeline
 
@@ -117,6 +118,6 @@ def text2image(input: SDInputs):
     parser.add_argument("--bf16", action="store_true")
 
     args = parser.parse_args()
-    
+
     logger.info("Text2image server started.")
     opea_microservices["opea_service@text2image"].start()

From 618cb8164e452237a0ea108cf9ff421e54e32e27 Mon Sep 17 00:00:00 2001
From: "Ye, Xinyu" <xinyu.ye@intel.com>
Date: Fri, 27 Sep 2024 04:21:33 -0400
Subject: [PATCH 7/7] fix test.

Signed-off-by: Ye, Xinyu <xinyu.ye@intel.com>
---
 comps/text2image/text2image.py      | 14 +++++++-------
 tests/text2image/test_text2image.sh |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/comps/text2image/text2image.py b/comps/text2image/text2image.py
index 9cb3caa97..050df49d5 100644
--- a/comps/text2image/text2image.py
+++ b/comps/text2image/text2image.py
@@ -49,14 +49,14 @@ def initialize():
                         "token": args.token,
                     }
                 )
-                # if "stable-diffusion-3" in args.model_name_or_path:
-                #     from optimum.habana.diffusers import GaudiStableDiffusion3Pipeline
+                if "stable-diffusion-3" in args.model_name_or_path:
+                    from optimum.habana.diffusers import GaudiStableDiffusion3Pipeline
 
-                #     pipe = GaudiStableDiffusion3Pipeline.from_pretrained(
-                #         args.model_name_or_path,
-                #         **kwargs,
-                #     )
-                if "stable-diffusion" in args.model_name_or_path.lower() or "flux" in args.model_name_or_path.lower():
+                    pipe = GaudiStableDiffusion3Pipeline.from_pretrained(
+                        args.model_name_or_path,
+                        **kwargs,
+                    )
+                elif "stable-diffusion" in args.model_name_or_path.lower() or "flux" in args.model_name_or_path.lower():
                     from optimum.habana.diffusers import AutoPipelineForText2Image
 
                     pipe = AutoPipelineForText2Image.from_pretrained(
diff --git a/tests/text2image/test_text2image.sh b/tests/text2image/test_text2image.sh
index 3e45bd441..0872e6cb5 100644
--- a/tests/text2image/test_text2image.sh
+++ b/tests/text2image/test_text2image.sh
@@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
-    docker build --no-cache -t opea/text2image:latest --build-arg MODEL=stabilityai/stable-diffusion-xl-base-1.0 -f comps/text2image/Dockerfile .
+    docker build --no-cache -t opea/text2image:latest -f comps/text2image/Dockerfile .
     if [ $? -ne 0 ]; then
         echo "opea/text2image built fail"
         exit 1
@@ -21,7 +21,7 @@ function build_docker_images() {
 
 function start_service() {
     unset http_proxy
-    docker run -d --name="test-comps-text2image" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9379:9379 --ipc=host opea/text2image:latest
+    docker run -d --name="test-comps-text2image" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e MODEL=stabilityai/stable-diffusion-xl-base-1.0 -p 9379:9379 --ipc=host opea/text2image:latest
     sleep 30s
 }