-
Notifications
You must be signed in to change notification settings - Fork 144
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add image2video microservice (Stable Video Diffusion) (#465)
* added image2video microservice. Signed-off-by: Xinyu Ye <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * addition changes. Signed-off-by: Xinyu Ye <[email protected]> * minor changes Signed-off-by: Xinyu Ye <[email protected]> * added ut test Signed-off-by: Ye, Xinyu <[email protected]> * unified path. Signed-off-by: Ye, Xinyu <[email protected]> * added gaudi support for svd. Signed-off-by: Ye, Xinyu <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix bug Signed-off-by: Ye, Xinyu <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix ut Signed-off-by: Ye, Xinyu <[email protected]> * add docker image release file Signed-off-by: Ye, Xinyu <[email protected]> --------- Signed-off-by: Xinyu Ye <[email protected]> Signed-off-by: Ye, Xinyu <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chen, suyue <[email protected]> Co-authored-by: lvliang-intel <[email protected]> Co-authored-by: kevinintel <[email protected]>
- Loading branch information
1 parent
66afdb4
commit a03e7a5
Showing
14 changed files
with
399 additions
and
0 deletions.
There are no files selected for viewing
18 changes: 18 additions & 0 deletions
18
.github/workflows/docker/compose/image2video-compose-cd.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# this file should be run in the root of the repo | ||
# images used by GenAIExamples: image2video,svd,svd-gaudi | ||
services: | ||
image2video: | ||
build: | ||
dockerfile: comps/image2video/Dockerfile | ||
image: ${REGISTRY:-opea}/image2video:${TAG:-latest} | ||
svd: | ||
build: | ||
dockerfile: comps/image2video/dependency/Dockerfile | ||
image: ${REGISTRY:-opea}/svd:${TAG:-latest} | ||
svd-gaudi: | ||
build: | ||
dockerfile: comps/image2video/dependency/Dockerfile.intel_hpu | ||
image: ${REGISTRY:-opea}/svd-gaudi:${TAG:-latest} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
FROM python:3.11-slim | ||
|
||
# Set environment variables | ||
ENV LANG=en_US.UTF-8 | ||
|
||
COPY comps /home/comps | ||
|
||
RUN pip install --no-cache-dir --upgrade pip && \ | ||
pip install --no-cache-dir -r /home/comps/image2video/requirements.txt | ||
|
||
ENV PYTHONPATH=$PYTHONPATH:/home | ||
|
||
WORKDIR /home/comps/image2video | ||
|
||
ENTRYPOINT ["python", "image2video.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# Image-to-Video Microservice | ||
|
||
Image-to-Video is a task that generate video conditioning on the provided image(s). This microservice supports image-to-video task by using Stable Video Diffusion (SVD) model. | ||
|
||
# 🚀1. Start Microservice with Python (Option 1) | ||
|
||
## 1.1 Install Requirements | ||
|
||
```bash | ||
pip install -r requirements.txt | ||
pip install -r dependency/requirements.txt | ||
``` | ||
|
||
## 1.2 Start SVD Service | ||
|
||
```bash | ||
# Start SVD service | ||
cd dependency/ | ||
python svd_server.py | ||
``` | ||
|
||
## 1.3 Start Image-to-Video Microservice | ||
|
||
```bash | ||
cd .. | ||
# Start the OPEA Microservice | ||
python image2video.py | ||
``` | ||
|
||
# 🚀2. Start Microservice with Docker (Option 2) | ||
|
||
## 2.1 Build Images | ||
|
||
### 2.1.1 SVD Server Image | ||
|
||
Build SVD server image on Xeon with below command: | ||
|
||
```bash | ||
cd ../.. | ||
docker build -t opea/svd:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/image2video/dependency/Dockerfile . | ||
``` | ||
|
||
Build SVD server image on Gaudi with below command: | ||
|
||
```bash | ||
cd ../.. | ||
docker build -t opea/svd-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/image2video/dependency/Dockerfile.intel_hpu . | ||
``` | ||
|
||
### 2.1.2 Image-to-Video Service Image | ||
|
||
```bash | ||
docker build -t opea/image2video:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/image2video/Dockerfile . | ||
``` | ||
|
||
## 2.2 Start SVD and Image-to-Video Service | ||
|
||
### 2.2.1 Start SVD server | ||
|
||
Start SVD server on Xeon with below command: | ||
|
||
```bash | ||
docker run --ipc=host -p 9368:9368 -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/svd:latest | ||
``` | ||
|
||
Start SVD server on Gaudi with below command: | ||
|
||
```bash | ||
docker run -p 9368:9368 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/svd-gaudi:latest | ||
``` | ||
|
||
### 2.2.2 Start Image-to-Video service | ||
|
||
```bash | ||
ip_address=$(hostname -I | awk '{print $1}') | ||
docker run -p 9369:9369 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e SVD_ENDPOINT=http://$ip_address:9368 opea/image2video:latest | ||
``` | ||
|
||
### 2.2.3 Test | ||
|
||
```bash | ||
http_proxy="" curl http://localhost:9369/v1/image2video -XPOST -d '{"images_path":[{"image_path":"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png"}]}' -H 'Content-Type: application/json' | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
FROM python:3.11-slim | ||
|
||
# Set environment variables | ||
ENV LANG=en_US.UTF-8 | ||
|
||
ARG ARCH="cpu" | ||
|
||
COPY comps /home/comps | ||
|
||
RUN apt-get update && apt-get install python3-opencv -y && \ | ||
pip install --no-cache-dir --upgrade pip && \ | ||
if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ | ||
pip install --no-cache-dir -r /home/comps/image2video/dependency/requirements.txt | ||
|
||
ENV PYTHONPATH=$PYTHONPATH:/home | ||
|
||
WORKDIR /home/comps/image2video/dependency | ||
|
||
ENTRYPOINT ["python", "svd_server.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# HABANA environment | ||
# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu | ||
FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu | ||
RUN useradd -m -s /bin/bash user && \ | ||
mkdir -p /home/user && \ | ||
chown -R user /home/user/ | ||
|
||
COPY comps /home/user/comps | ||
|
||
RUN chown -R user /home/user/comps/image2video | ||
|
||
RUN rm -rf /etc/ssh/ssh_host* | ||
USER user | ||
# Set environment variables | ||
ENV LANG=en_US.UTF-8 | ||
ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana | ||
|
||
# Install requirements and optimum habana | ||
RUN pip install --no-cache-dir --upgrade pip && \ | ||
pip install --no-cache-dir -r /home/user/comps/image2video/dependency/requirements.txt && \ | ||
pip install --no-cache-dir optimum[habana] | ||
|
||
ENV PYTHONPATH=$PYTHONPATH:/home/user | ||
|
||
WORKDIR /home/user/comps/image2video/dependency | ||
|
||
ENTRYPOINT ["python", "svd_server.py", "--device", "hpu"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
accelerate | ||
diffusers | ||
fastapi | ||
opencv-python | ||
torch | ||
transformers | ||
uvicorn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
"""Stand-alone Stable Video Diffusion FastAPI Server.""" | ||
|
||
import argparse | ||
import os | ||
import time | ||
|
||
import torch | ||
import uvicorn | ||
from diffusers import StableVideoDiffusionPipeline | ||
from diffusers.utils import export_to_video, load_image | ||
from fastapi import FastAPI, Request | ||
from fastapi.responses import JSONResponse, Response | ||
|
||
app = FastAPI() | ||
|
||
|
||
@app.post("/generate") | ||
async def generate(request: Request) -> Response: | ||
print("SVD generation begin.") | ||
request_dict = await request.json() | ||
images_path = request_dict.pop("images_path") | ||
|
||
start = time.time() | ||
images = [load_image(img) for img in images_path] | ||
images = [image.resize((1024, 576)) for image in images] | ||
|
||
generator = torch.manual_seed(args.seed) if args.device == "cpu" else None | ||
frames = pipe(images, decode_chunk_size=8, generator=generator).frames[0] | ||
video_path = os.path.join(os.getcwd(), args.video_path) | ||
export_to_video(frames, video_path, fps=7) | ||
end = time.time() | ||
print(f"SVD video output in {video_path}, time = {end-start}s") | ||
return JSONResponse({"video_path": video_path}) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--host", type=str, default="0.0.0.0") | ||
parser.add_argument("--port", type=int, default=9368) | ||
parser.add_argument("--model_name_or_path", type=str, default="stabilityai/stable-video-diffusion-img2vid-xt") | ||
parser.add_argument("--video_path", type=str, default="generated.mp4") | ||
parser.add_argument("--use_hpu_graphs", default=False, action="store_true") | ||
parser.add_argument("--device", type=str, default="cpu") | ||
parser.add_argument("--seed", type=int, default=42) | ||
|
||
args = parser.parse_args() | ||
if args.device == "hpu": | ||
from optimum.habana.diffusers import GaudiEulerDiscreteScheduler, GaudiStableVideoDiffusionPipeline | ||
from optimum.habana.utils import set_seed | ||
|
||
set_seed(args.seed) | ||
scheduler = GaudiEulerDiscreteScheduler.from_pretrained(args.model_name_or_path, subfolder="scheduler") | ||
kwargs = { | ||
"scheduler": scheduler, | ||
"use_habana": True, | ||
"use_hpu_graphs": args.use_hpu_graphs, | ||
"gaudi_config": "Habana/stable-diffusion", | ||
} | ||
pipe = GaudiStableVideoDiffusionPipeline.from_pretrained( | ||
args.model_name_or_path, | ||
**kwargs, | ||
) | ||
elif args.device == "cpu": | ||
pipe = StableVideoDiffusionPipeline.from_pretrained(args.model_name_or_path) | ||
else: | ||
raise NotImplementedError(f"Only support cpu and hpu device now, device {args.device} not supported.") | ||
print("Stable Video Diffusion model initialized.") | ||
|
||
uvicorn.run( | ||
app, | ||
host=args.host, | ||
port=args.port, | ||
log_level="debug", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
|
||
import json | ||
import os | ||
import time | ||
|
||
import requests | ||
|
||
from comps import ( | ||
ImagesPath, | ||
ServiceType, | ||
VideoPath, | ||
opea_microservices, | ||
register_microservice, | ||
register_statistics, | ||
statistics_dict, | ||
) | ||
|
||
|
||
@register_microservice( | ||
name="opea_service@image2video", | ||
service_type=ServiceType.IMAGE2VIDEO, | ||
endpoint="/v1/image2video", | ||
host="0.0.0.0", | ||
port=9369, | ||
input_datatype=ImagesPath, | ||
output_datatype=VideoPath, | ||
) | ||
@register_statistics(names=["opea_service@image2video"]) | ||
async def image2video(input: ImagesPath): | ||
start = time.time() | ||
images_path = [img.image_path for img in input.images_path] | ||
inputs = {"images_path": images_path} | ||
video_path = requests.post(url=f"{svd_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}).json()[ | ||
"video_path" | ||
] | ||
|
||
statistics_dict["opea_service@image2video"].append_latency(time.time() - start, None) | ||
return VideoPath(video_path=video_path) | ||
|
||
|
||
if __name__ == "__main__": | ||
svd_endpoint = os.getenv("SVD_ENDPOINT", "http://localhost:9368") | ||
print("Image2video server started.") | ||
opea_microservices["opea_service@image2video"].start() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
datasets | ||
docarray[full] | ||
fastapi | ||
opentelemetry-api | ||
opentelemetry-exporter-otlp | ||
opentelemetry-sdk | ||
prometheus-fastapi-instrumentator | ||
pydantic==2.7.2 | ||
pydub | ||
shortuuid | ||
uvicorn |
Oops, something went wrong.