diff --git a/.github/workflows/docker/compose/image2video-compose-cd.yaml b/.github/workflows/docker/compose/image2video-compose-cd.yaml new file mode 100644 index 000000000..a28c45d21 --- /dev/null +++ b/.github/workflows/docker/compose/image2video-compose-cd.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: image2video,svd,svd-gaudi +services: + image2video: + build: + dockerfile: comps/image2video/Dockerfile + image: ${REGISTRY:-opea}/image2video:${TAG:-latest} + svd: + build: + dockerfile: comps/image2video/dependency/Dockerfile + image: ${REGISTRY:-opea}/svd:${TAG:-latest} + svd-gaudi: + build: + dockerfile: comps/image2video/dependency/Dockerfile.intel_hpu + image: ${REGISTRY:-opea}/svd-gaudi:${TAG:-latest} diff --git a/comps/__init__.py b/comps/__init__.py index 85c8456c0..e9f8db761 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -21,6 +21,9 @@ GraphDoc, LVMDoc, LVMVideoDoc, + ImagePath, + ImagesPath, + VideoPath, ImageDoc, TextImageDoc, MultimodalDoc, diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index b95a56b08..90abf1c73 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -29,6 +29,8 @@ class ServiceType(Enum): LVM = 12 KNOWLEDGE_GRAPH = 13 WEB_RETRIEVER = 14 + IMAGE2VIDEO = 15 + TEXT2IMAGE = 16 class MegaServiceEndpoint(Enum): diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index 3c97880a3..c02ad6939 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -249,3 +249,15 @@ class LVMVideoDoc(BaseDoc): chunk_duration: float prompt: str max_new_tokens: conint(ge=0, le=1024) = 512 + + +class ImagePath(BaseDoc): + image_path: str + + +class ImagesPath(BaseDoc): + images_path: DocList[ImagePath] + + +class VideoPath(BaseDoc): + video_path: str diff --git a/comps/image2video/Dockerfile b/comps/image2video/Dockerfile new file mode 100644 index 000000000..9adef002c --- /dev/null +++ b/comps/image2video/Dockerfile @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +# Set environment variables +ENV LANG=en_US.UTF-8 + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/image2video/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/image2video + +ENTRYPOINT ["python", "image2video.py"] \ No newline at end of file diff --git a/comps/image2video/README.md b/comps/image2video/README.md new file mode 100644 index 000000000..a8ec7926c --- /dev/null +++ b/comps/image2video/README.md @@ -0,0 +1,83 @@ +# Image-to-Video Microservice + +Image-to-Video is a task that generate video conditioning on the provided image(s). This microservice supports image-to-video task by using Stable Video Diffusion (SVD) model. + +# 🚀1. Start Microservice with Python (Option 1) + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +pip install -r dependency/requirements.txt +``` + +## 1.2 Start SVD Service + +```bash +# Start SVD service +cd dependency/ +python svd_server.py +``` + +## 1.3 Start Image-to-Video Microservice + +```bash +cd .. +# Start the OPEA Microservice +python image2video.py +``` + +# 🚀2. Start Microservice with Docker (Option 2) + +## 2.1 Build Images + +### 2.1.1 SVD Server Image + +Build SVD server image on Xeon with below command: + +```bash +cd ../.. +docker build -t opea/svd:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/image2video/dependency/Dockerfile . +``` + +Build SVD server image on Gaudi with below command: + +```bash +cd ../.. +docker build -t opea/svd-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/image2video/dependency/Dockerfile.intel_hpu . +``` + +### 2.1.2 Image-to-Video Service Image + +```bash +docker build -t opea/image2video:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/image2video/Dockerfile . +``` + +## 2.2 Start SVD and Image-to-Video Service + +### 2.2.1 Start SVD server + +Start SVD server on Xeon with below command: + +```bash +docker run --ipc=host -p 9368:9368 -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/svd:latest +``` + +Start SVD server on Gaudi with below command: + +```bash +docker run -p 9368:9368 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/svd-gaudi:latest +``` + +### 2.2.2 Start Image-to-Video service + +```bash +ip_address=$(hostname -I | awk '{print $1}') +docker run -p 9369:9369 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e SVD_ENDPOINT=http://$ip_address:9368 opea/image2video:latest +``` + +### 2.2.3 Test + +```bash +http_proxy="" curl http://localhost:9369/v1/image2video -XPOST -d '{"images_path":[{"image_path":"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png"}]}' -H 'Content-Type: application/json' +``` diff --git a/comps/image2video/__init__.py b/comps/image2video/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/image2video/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/image2video/dependency/Dockerfile b/comps/image2video/dependency/Dockerfile new file mode 100644 index 000000000..1ae4dfb75 --- /dev/null +++ b/comps/image2video/dependency/Dockerfile @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +# Set environment variables +ENV LANG=en_US.UTF-8 + +ARG ARCH="cpu" + +COPY comps /home/comps + +RUN apt-get update && apt-get install python3-opencv -y && \ + pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/comps/image2video/dependency/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/image2video/dependency + +ENTRYPOINT ["python", "svd_server.py"] \ No newline at end of file diff --git a/comps/image2video/dependency/Dockerfile.intel_hpu b/comps/image2video/dependency/Dockerfile.intel_hpu new file mode 100644 index 000000000..050a8c391 --- /dev/null +++ b/comps/image2video/dependency/Dockerfile.intel_hpu @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# HABANA environment +# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu +FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +RUN chown -R user /home/user/comps/image2video + +RUN rm -rf /etc/ssh/ssh_host* +USER user +# Set environment variables +ENV LANG=en_US.UTF-8 +ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana + +# Install requirements and optimum habana +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/image2video/dependency/requirements.txt && \ + pip install --no-cache-dir optimum[habana] + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/image2video/dependency + +ENTRYPOINT ["python", "svd_server.py", "--device", "hpu"] \ No newline at end of file diff --git a/comps/image2video/dependency/requirements.txt b/comps/image2video/dependency/requirements.txt new file mode 100644 index 000000000..120fd7721 --- /dev/null +++ b/comps/image2video/dependency/requirements.txt @@ -0,0 +1,7 @@ +accelerate +diffusers +fastapi +opencv-python +torch +transformers +uvicorn diff --git a/comps/image2video/dependency/svd_server.py b/comps/image2video/dependency/svd_server.py new file mode 100644 index 000000000..db1194c9c --- /dev/null +++ b/comps/image2video/dependency/svd_server.py @@ -0,0 +1,76 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Stand-alone Stable Video Diffusion FastAPI Server.""" + +import argparse +import os +import time + +import torch +import uvicorn +from diffusers import StableVideoDiffusionPipeline +from diffusers.utils import export_to_video, load_image +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, Response + +app = FastAPI() + + +@app.post("/generate") +async def generate(request: Request) -> Response: + print("SVD generation begin.") + request_dict = await request.json() + images_path = request_dict.pop("images_path") + + start = time.time() + images = [load_image(img) for img in images_path] + images = [image.resize((1024, 576)) for image in images] + + generator = torch.manual_seed(args.seed) if args.device == "cpu" else None + frames = pipe(images, decode_chunk_size=8, generator=generator).frames[0] + video_path = os.path.join(os.getcwd(), args.video_path) + export_to_video(frames, video_path, fps=7) + end = time.time() + print(f"SVD video output in {video_path}, time = {end-start}s") + return JSONResponse({"video_path": video_path}) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="0.0.0.0") + parser.add_argument("--port", type=int, default=9368) + parser.add_argument("--model_name_or_path", type=str, default="stabilityai/stable-video-diffusion-img2vid-xt") + parser.add_argument("--video_path", type=str, default="generated.mp4") + parser.add_argument("--use_hpu_graphs", default=False, action="store_true") + parser.add_argument("--device", type=str, default="cpu") + parser.add_argument("--seed", type=int, default=42) + + args = parser.parse_args() + if args.device == "hpu": + from optimum.habana.diffusers import GaudiEulerDiscreteScheduler, GaudiStableVideoDiffusionPipeline + from optimum.habana.utils import set_seed + + set_seed(args.seed) + scheduler = GaudiEulerDiscreteScheduler.from_pretrained(args.model_name_or_path, subfolder="scheduler") + kwargs = { + "scheduler": scheduler, + "use_habana": True, + "use_hpu_graphs": args.use_hpu_graphs, + "gaudi_config": "Habana/stable-diffusion", + } + pipe = GaudiStableVideoDiffusionPipeline.from_pretrained( + args.model_name_or_path, + **kwargs, + ) + elif args.device == "cpu": + pipe = StableVideoDiffusionPipeline.from_pretrained(args.model_name_or_path) + else: + raise NotImplementedError(f"Only support cpu and hpu device now, device {args.device} not supported.") + print("Stable Video Diffusion model initialized.") + + uvicorn.run( + app, + host=args.host, + port=args.port, + log_level="debug", + ) diff --git a/comps/image2video/image2video.py b/comps/image2video/image2video.py new file mode 100644 index 000000000..31e4adf4f --- /dev/null +++ b/comps/image2video/image2video.py @@ -0,0 +1,47 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import json +import os +import time + +import requests + +from comps import ( + ImagesPath, + ServiceType, + VideoPath, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + + +@register_microservice( + name="opea_service@image2video", + service_type=ServiceType.IMAGE2VIDEO, + endpoint="/v1/image2video", + host="0.0.0.0", + port=9369, + input_datatype=ImagesPath, + output_datatype=VideoPath, +) +@register_statistics(names=["opea_service@image2video"]) +async def image2video(input: ImagesPath): + start = time.time() + images_path = [img.image_path for img in input.images_path] + inputs = {"images_path": images_path} + video_path = requests.post(url=f"{svd_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}).json()[ + "video_path" + ] + + statistics_dict["opea_service@image2video"].append_latency(time.time() - start, None) + return VideoPath(video_path=video_path) + + +if __name__ == "__main__": + svd_endpoint = os.getenv("SVD_ENDPOINT", "http://localhost:9368") + print("Image2video server started.") + opea_microservices["opea_service@image2video"].start() diff --git a/comps/image2video/requirements.txt b/comps/image2video/requirements.txt new file mode 100644 index 000000000..069279834 --- /dev/null +++ b/comps/image2video/requirements.txt @@ -0,0 +1,11 @@ +datasets +docarray[full] +fastapi +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +pydantic==2.7.2 +pydub +shortuuid +uvicorn diff --git a/tests/test_image2video.sh b/tests/test_image2video.sh new file mode 100644 index 000000000..185dfe9c0 --- /dev/null +++ b/tests/test_image2video.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/svd:latest -f comps/image2video/dependency/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/svd built fail" + exit 1 + else + echo "opea/svd built successful" + fi + docker build --no-cache -t opea/image2video:latest -f comps/image2video/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/image2video built fail" + exit 1 + else + echo "opea/image2video built successful" + fi +} + +function start_service() { + unset http_proxy + docker run -d --name="test-comps-image2video-svd" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9368:9368 --ipc=host opea/svd:latest + docker run -d --name="test-comps-image2video" -e SVD_ENDPOINT=http://$ip_address:9368 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9369:9369 --ipc=host opea/image2video:latest + sleep 3m +} + +function validate_microservice() { + result=$(http_proxy="" curl http://localhost:9369/v1/image2video -XPOST -d '{"images_path":[{"image_path":"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png"}]}' -H 'Content-Type: application/json') + if [[ $result == *"generated.mp4"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs test-comps-image2video-svd + docker logs test-comps-image2video + exit 1 + fi + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-image2video*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main