-
Notifications
You must be signed in to change notification settings - Fork 98
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This PR: - gives unique test file names - add test for each node main function until dora-rs node initialization error in order to catch all dependency issues.
- Loading branch information
Showing
32 changed files
with
605 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,32 @@ | ||
#!/bin/bash | ||
set -euo | ||
|
||
# List of ignored modules | ||
ignored_folders=("dora-internvl" "dora-parler" "dora-keyboard" "dora-microphone" "terminal-input") | ||
|
||
for dir in node-hub/*/ ; do | ||
if [ -d "$dir" ]; then | ||
if [ -f "$dir/pyproject.toml" ]; then | ||
echo "Running linting and tests for Python project in $dir..." | ||
(cd "$dir" && pip install .) | ||
(cd "$dir" && poetry run black --check .) | ||
(cd "$dir" && poetry run pylint --disable=C,R --ignored-modules=cv2 **/*.py) | ||
(cd "$dir" && poetry run pytest) | ||
# Get the base name of the directory (without the path) | ||
base_dir=$(basename "$dir") | ||
|
||
# Check if the directory name is in the ignored list | ||
if [[ " ${ignored_folders[@]} " =~ " ${base_dir} " ]]; then | ||
echo "Skipping $base_dir as there is a hf model fetching issue..." | ||
continue | ||
fi | ||
|
||
if [ -f "$dir/Cargo.toml" ]; then | ||
echo "Running build and tests for Rust project in $dir..." | ||
(cd "$dir" && cargo build) | ||
(cd "$dir" && cargo test) | ||
|
||
if [ -d "$dir" ]; then | ||
if [ -f "$dir/pyproject.toml" ]; then | ||
echo "Running linting and tests for Python project in $dir..." | ||
(cd "$dir" && pip install .) | ||
(cd "$dir" && poetry run black --check .) | ||
(cd "$dir" && poetry run pylint --disable=C,R --ignored-modules=cv2 **/*.py) | ||
(cd "$dir" && poetry run pytest) | ||
fi | ||
|
||
if [ -f "$dir/Cargo.toml" ]; then | ||
echo "Running build and tests for Rust project in $dir..." | ||
(cd "$dir" && cargo build) | ||
(cd "$dir" && cargo test) | ||
fi | ||
fi | ||
fi | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import pytest | ||
|
||
|
||
def test_import_main(): | ||
|
||
from dora_distil_whisper.main import main | ||
|
||
# Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. | ||
with pytest.raises(RuntimeError): | ||
main() |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import pytest | ||
|
||
|
||
def test_import_main(): | ||
from dora_echo.main import main | ||
|
||
# Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. | ||
with pytest.raises(RuntimeError): | ||
main() |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Dora VLM | ||
|
||
Experimental node for using a VLM within dora. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import os | ||
|
||
# Define the path to the README file relative to the package directory | ||
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") | ||
|
||
# Read the content of the README file | ||
try: | ||
with open(readme_path, "r", encoding="utf-8") as f: | ||
__doc__ = f.read() | ||
except FileNotFoundError: | ||
__doc__ = "README file not found." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
import os | ||
from dora import Node | ||
import numpy as np | ||
import pyarrow as pa | ||
import torch | ||
import torchvision.transforms as T | ||
from PIL import Image | ||
from torchvision.transforms.functional import InterpolationMode | ||
from transformers import AutoModel, AutoTokenizer | ||
|
||
IMAGENET_MEAN = (0.485, 0.456, 0.406) | ||
IMAGENET_STD = (0.229, 0.224, 0.225) | ||
|
||
|
||
def build_transform(input_size): | ||
MEAN, STD = IMAGENET_MEAN, IMAGENET_STD | ||
transform = T.Compose( | ||
[ | ||
T.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img), | ||
T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), | ||
T.ToTensor(), | ||
T.Normalize(mean=MEAN, std=STD), | ||
] | ||
) | ||
return transform | ||
|
||
|
||
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size): | ||
best_ratio_diff = float("inf") | ||
best_ratio = (1, 1) | ||
area = width * height | ||
for ratio in target_ratios: | ||
target_aspect_ratio = ratio[0] / ratio[1] | ||
ratio_diff = abs(aspect_ratio - target_aspect_ratio) | ||
if ratio_diff < best_ratio_diff: | ||
best_ratio_diff = ratio_diff | ||
best_ratio = ratio | ||
elif ratio_diff == best_ratio_diff: | ||
if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]: | ||
best_ratio = ratio | ||
return best_ratio | ||
|
||
|
||
def dynamic_preprocess( | ||
image, min_num=1, max_num=12, image_size=448, use_thumbnail=False | ||
): | ||
orig_width, orig_height = image.size | ||
aspect_ratio = orig_width / orig_height | ||
|
||
# calculate the existing image aspect ratio | ||
target_ratios = set( | ||
(i, j) | ||
for n in range(min_num, max_num + 1) | ||
for i in range(1, n + 1) | ||
for j in range(1, n + 1) | ||
if i * j <= max_num and i * j >= min_num | ||
) | ||
target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1]) | ||
|
||
# find the closest aspect ratio to the target | ||
target_aspect_ratio = find_closest_aspect_ratio( | ||
aspect_ratio, target_ratios, orig_width, orig_height, image_size | ||
) | ||
|
||
# calculate the target width and height | ||
target_width = image_size * target_aspect_ratio[0] | ||
target_height = image_size * target_aspect_ratio[1] | ||
blocks = target_aspect_ratio[0] * target_aspect_ratio[1] | ||
|
||
# resize the image | ||
resized_img = image.resize((target_width, target_height)) | ||
processed_images = [] | ||
for i in range(blocks): | ||
box = ( | ||
(i % (target_width // image_size)) * image_size, | ||
(i // (target_width // image_size)) * image_size, | ||
((i % (target_width // image_size)) + 1) * image_size, | ||
((i // (target_width // image_size)) + 1) * image_size, | ||
) | ||
# split the image | ||
split_img = resized_img.crop(box) | ||
processed_images.append(split_img) | ||
assert len(processed_images) == blocks | ||
if use_thumbnail and len(processed_images) != 1: | ||
thumbnail_img = image.resize((image_size, image_size)) | ||
processed_images.append(thumbnail_img) | ||
return processed_images | ||
|
||
|
||
def load_image(image_array: np.array, input_size=448, max_num=12): | ||
image = Image.fromarray(image_array).convert("RGB") | ||
transform = build_transform(input_size=input_size) | ||
images = dynamic_preprocess( | ||
image, image_size=input_size, use_thumbnail=True, max_num=max_num | ||
) | ||
pixel_values = [transform(image) for image in images] | ||
pixel_values = torch.stack(pixel_values) | ||
return pixel_values | ||
|
||
|
||
def main(): | ||
# Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables. | ||
model_path = os.getenv("MODEL", "OpenGVLab/InternVL2-1B") | ||
|
||
# If you want to load a model using multiple GPUs, please refer to the `Multiple GPUs` section. | ||
model = ( | ||
AutoModel.from_pretrained( | ||
model_path, | ||
torch_dtype=torch.bfloat16, | ||
low_cpu_mem_usage=True, | ||
use_flash_attn=True, | ||
trust_remote_code=True, | ||
) | ||
.eval() | ||
.cuda() | ||
) | ||
tokenizer = AutoTokenizer.from_pretrained( | ||
model_path, trust_remote_code=True, use_fast=False | ||
) | ||
|
||
node = Node() | ||
|
||
question = "<image>\nPlease describe the image shortly." | ||
frame = None | ||
pa.array([]) # initialize pyarrow array | ||
|
||
for event in node: | ||
event_type = event["type"] | ||
|
||
if event_type == "INPUT": | ||
event_id = event["id"] | ||
|
||
if event_id == "image": | ||
storage = event["value"] | ||
metadata = event["metadata"] | ||
encoding = metadata["encoding"] | ||
width = metadata["width"] | ||
height = metadata["height"] | ||
|
||
if encoding == "bgr8": | ||
channels = 3 | ||
storage_type = np.uint8 | ||
elif encoding == "rgb8": | ||
channels = 3 | ||
storage_type = np.uint8 | ||
else: | ||
raise RuntimeError(f"Unsupported image encoding: {encoding}") | ||
|
||
frame = ( | ||
storage.to_numpy() | ||
.astype(storage_type) | ||
.reshape((height, width, channels)) | ||
) | ||
if encoding == "bgr8": | ||
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) | ||
elif encoding == "rgb8": | ||
pass | ||
else: | ||
raise RuntimeError(f"Unsupported image encoding: {encoding}") | ||
|
||
elif event_id == "text": | ||
question = "<image>\n" + event["value"][0].as_py() | ||
if frame is not None: | ||
# set the max number of tiles in `max_num` | ||
pixel_values = ( | ||
load_image(frame, max_num=12).to(torch.bfloat16).cuda() | ||
) | ||
generation_config = dict(max_new_tokens=1024, do_sample=True) | ||
response = model.chat( | ||
tokenizer, pixel_values, question, generation_config | ||
) | ||
node.send_output( | ||
"text", | ||
pa.array([response]), | ||
metadata, | ||
) | ||
|
||
elif event_type == "ERROR": | ||
raise RuntimeError(event["error"]) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
[tool.poetry] | ||
name = "dora-internvl" | ||
version = "0.3.6" | ||
authors = [ | ||
"Haixuan Xavier Tao <[email protected]>", | ||
"Enzo Le Van <[email protected]>", | ||
] | ||
description = "Dora Node for VLM" | ||
readme = "README.md" | ||
|
||
packages = [{ include = "dora_internvl" }] | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.7" | ||
dora-rs = "^0.3.6" | ||
numpy = "< 2.0.0" | ||
torch = "^2.2.0" | ||
torchvision = "^0.17" | ||
transformers = "^4.11.3" | ||
pillow = "^10.0.0" | ||
bitsandbytes = "^0.41.0" | ||
einops = "^0.6.1" | ||
einops-exts = "^0.0.4" | ||
timm = "^0.9.12" | ||
sentencepiece = "^0.1.99" | ||
|
||
[tool.poetry.scripts] | ||
dora-internvl = "dora_internvl.main:main" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.8.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import pytest | ||
|
||
|
||
def test_import_main(): | ||
from dora_internvl.main import main | ||
|
||
# Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. | ||
with pytest.raises(RuntimeError): | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import pytest | ||
|
||
|
||
def test_import_main(): | ||
from dora_keyboard.main import main | ||
|
||
# Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. | ||
with pytest.raises(RuntimeError): | ||
main() |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.