handle no vision

DevXT-LLC · Feb 11, 2025 · 23c0fd8 · 23c0fd8
1 parent 6e85550
commit 23c0fd8
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 29 deletions.
diff --git a/.env b/.env
@@ -3,7 +3,7 @@ MAIN_GPU=0
 NGROK_TOKEN=
 EZLOCALAI_API_KEY=
 EZLOCALAI_URL=http://localhost:8091
-DEFAULT_MODEL=bartowski/Qwen2-VL-2B-Instruct-GGUF
+DEFAULT_MODEL=lmstudio-community/Qwen2-VL-7B-Instruct-GGUF
 IMG_ENABLED=false
 IMG_DEVICE=cpu
 SD_MODEL=stabilityai/sdxl-turbo

diff --git a/Pipes.py b/Pipes.py
@@ -166,38 +166,19 @@ async def get_response(self, data, completion_type="chat"):
             else data["prompt"]
         )
         if images:
-            new_messages = [
+            data["messages"][-1]["content"] = [
                 {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": "Describe each stage of this image.",
-                        },
-                    ],
+                    "type": "text",
+                    "text": user_message,
                 }
             ]
-            new_messages[0]["content"].extend(images)
+            data["messages"][-1]["content"].extend(images)
+        if completion_type == "chat":
             try:
-                image_description = self.llm.chat(messages=new_messages)
-                print(
-                    f"Image Description: {image_description['choices'][0]['message']['content']}"
-                )
-                prompt = (
-                    f"\n\nSee the uploaded image description for any questions about the uploaded image. Act as if you can see the image based on the description. Do not mention 'uploaded image description' in response. Uploaded Image Description: {image_description['choices'][0]['message']['content']}\n\n{data['messages'][-1]['content'][0]['text']}"
-                    if completion_type == "chat"
-                    else f"\n\nSee the uploaded image description for any questions about the uploaded image. Act as if you can see the image based on the description. Do not mention 'uploaded image description' in response. Uploaded Image Description: {image_description['choices'][0]['message']['content']}\n\n{data['prompt']}"
-                )
-                print(f"Full Prompt: {prompt}")
-                if completion_type == "chat":
-                    data["messages"][-1]["content"] = prompt
-                else:
-                    data["prompt"] = prompt
+                response = self.llm.chat(**data)
             except:
-                logging.warning(f"[VLM] Unable to read image from URL.")
-                pass
-        if completion_type == "chat":
-            response = self.llm.chat(**data)
+                data["messages"][-1]["content"] = user_message
+                response = self.llm.chat(**data)
         else:
             response = self.llm.completion(**data)
         generated_image = None

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -9,7 +9,6 @@ services:
       - WHISPER_MODEL=${WHISPER_MODEL-base.en}
       - IMG_ENABLED=${IMG_ENABLED-false}
       - IMG_DEVICE=${IMG_DEVICE-cpu}
-      - VISION_MODEL=${VISION_MODEL}
       - LLM_BATCH_SIZE=${LLM_BATCH_SIZE-1024}
       - SD_MODEL=${SD_MODEL}
     restart: unless-stopped