app/src/vlm_template.py

import os
import sys
import torch
from openai import OpenAI
from transformers import (
    LlavaNextProcessor, LlavaNextForConditionalGeneration, 
    Qwen2VLForConditionalGeneration, Qwen2VLProcessor
)
## init device
device = "cpu"
torch_dtype = torch.float16


vlms_list = [
    # {
    #     "type": "llava-next",
    #     "name": "llava-v1.6-mistral-7b-hf",
    #     "local_path": "models/vlms/llava-v1.6-mistral-7b-hf",
    #     "processor": LlavaNextProcessor.from_pretrained(
    #         "models/vlms/llava-v1.6-mistral-7b-hf"
    #     ) if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else LlavaNextProcessor.from_pretrained(
    #         "llava-hf/llava-v1.6-mistral-7b-hf"
    #     ),
    #     "model": LlavaNextForConditionalGeneration.from_pretrained(
    #         "models/vlms/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
    #     ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else 
    #         LlavaNextForConditionalGeneration.from_pretrained(
    #             "llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
    #         ).to("cpu"),
    # },
    {
        "type": "llava-next",
        "name": "llama3-llava-next-8b-hf (Preload)",
        "local_path": "models/vlms/llama3-llava-next-8b-hf",
        "processor": LlavaNextProcessor.from_pretrained(
            "models/vlms/llama3-llava-next-8b-hf"
        ) if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else LlavaNextProcessor.from_pretrained(
            "llava-hf/llama3-llava-next-8b-hf"
        ),
        "model": LlavaNextForConditionalGeneration.from_pretrained(
            "models/vlms/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
        ).to("cpu") if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else 
            LlavaNextForConditionalGeneration.from_pretrained(
                "llava-hf/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
            ).to("cpu"),
    },
    # {
    #     "type": "llava-next",
    #     "name": "llava-v1.6-vicuna-13b-hf",
    #     "local_path": "models/vlms/llava-v1.6-vicuna-13b-hf",
    #     "processor": LlavaNextProcessor.from_pretrained(
    #         "models/vlms/llava-v1.6-vicuna-13b-hf"
    #     ) if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else LlavaNextProcessor.from_pretrained(
    #         "llava-hf/llava-v1.6-vicuna-13b-hf"
    #     ),
    #     "model": LlavaNextForConditionalGeneration.from_pretrained(
    #         "models/vlms/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
    #     ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else 
    #         LlavaNextForConditionalGeneration.from_pretrained(
    #             "llava-hf/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
    #         ).to("cpu"),
    # },
    # {
    #     "type": "llava-next",
    #     "name": "llava-v1.6-34b-hf",
    #     "local_path": "models/vlms/llava-v1.6-34b-hf",
    #     "processor": LlavaNextProcessor.from_pretrained(
    #         "models/vlms/llava-v1.6-34b-hf"
    #     ) if os.path.exists("models/vlms/llava-v1.6-34b-hf") else LlavaNextProcessor.from_pretrained(
    #         "llava-hf/llava-v1.6-34b-hf"
    #     ),
    #     "model": LlavaNextForConditionalGeneration.from_pretrained(
    #         "models/vlms/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
    #     ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-34b-hf") else 
    #         LlavaNextForConditionalGeneration.from_pretrained(
    #             "llava-hf/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
    #         ).to("cpu"),
    # },
    # {
    #     "type": "qwen2-vl",
    #     "name": "Qwen2-VL-2B-Instruct",
    #     "local_path": "models/vlms/Qwen2-VL-2B-Instruct",
    #     "processor": Qwen2VLProcessor.from_pretrained(
    #         "models/vlms/Qwen2-VL-2B-Instruct"
    #     ) if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else Qwen2VLProcessor.from_pretrained(
    #         "Qwen/Qwen2-VL-2B-Instruct"
    #     ),
    #     "model": Qwen2VLForConditionalGeneration.from_pretrained(
    #         "models/vlms/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
    #     ).to("cpu") if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else 
    #         Qwen2VLForConditionalGeneration.from_pretrained(
    #             "Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
    #         ).to("cpu"),
    # },
    {
        "type": "qwen2-vl",
        "name": "Qwen2-VL-7B-Instruct (Default)",
        "local_path": "models/vlms/Qwen2-VL-7B-Instruct",
        "processor": Qwen2VLProcessor.from_pretrained(
            "models/vlms/Qwen2-VL-7B-Instruct"
        ) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else Qwen2VLProcessor.from_pretrained(
            "Qwen/Qwen2-VL-7B-Instruct"
        ),
        "model": Qwen2VLForConditionalGeneration.from_pretrained(
            "models/vlms/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
        ).to("cpu") if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else 
            Qwen2VLForConditionalGeneration.from_pretrained(
                "Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
            ).to("cpu"),
    },
    {
        "type": "openai",
        "name": "GPT4-o (Highly Recommended)",
        "local_path": "",
        "processor": "",
        "model": ""
    },
]

vlms_template = {k["name"]: (k["type"], k["local_path"], k["processor"], k["model"]) for k in vlms_list}