Skip to content

Commit

Permalink
handle no vision
Browse files Browse the repository at this point in the history
  • Loading branch information
Josh-XT committed Feb 11, 2025
1 parent 6e85550 commit 23c0fd8
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ MAIN_GPU=0
NGROK_TOKEN=
EZLOCALAI_API_KEY=
EZLOCALAI_URL=http://localhost:8091
DEFAULT_MODEL=bartowski/Qwen2-VL-2B-Instruct-GGUF
DEFAULT_MODEL=lmstudio-community/Qwen2-VL-7B-Instruct-GGUF
IMG_ENABLED=false
IMG_DEVICE=cpu
SD_MODEL=stabilityai/sdxl-turbo
Expand Down
35 changes: 8 additions & 27 deletions Pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,38 +166,19 @@ async def get_response(self, data, completion_type="chat"):
else data["prompt"]
)
if images:
new_messages = [
data["messages"][-1]["content"] = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe each stage of this image.",
},
],
"type": "text",
"text": user_message,
}
]
new_messages[0]["content"].extend(images)
data["messages"][-1]["content"].extend(images)
if completion_type == "chat":
try:
image_description = self.llm.chat(messages=new_messages)
print(
f"Image Description: {image_description['choices'][0]['message']['content']}"
)
prompt = (
f"\n\nSee the uploaded image description for any questions about the uploaded image. Act as if you can see the image based on the description. Do not mention 'uploaded image description' in response. Uploaded Image Description: {image_description['choices'][0]['message']['content']}\n\n{data['messages'][-1]['content'][0]['text']}"
if completion_type == "chat"
else f"\n\nSee the uploaded image description for any questions about the uploaded image. Act as if you can see the image based on the description. Do not mention 'uploaded image description' in response. Uploaded Image Description: {image_description['choices'][0]['message']['content']}\n\n{data['prompt']}"
)
print(f"Full Prompt: {prompt}")
if completion_type == "chat":
data["messages"][-1]["content"] = prompt
else:
data["prompt"] = prompt
response = self.llm.chat(**data)
except:
logging.warning(f"[VLM] Unable to read image from URL.")
pass
if completion_type == "chat":
response = self.llm.chat(**data)
data["messages"][-1]["content"] = user_message
response = self.llm.chat(**data)
else:
response = self.llm.completion(**data)
generated_image = None
Expand Down
1 change: 0 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ services:
- WHISPER_MODEL=${WHISPER_MODEL-base.en}
- IMG_ENABLED=${IMG_ENABLED-false}
- IMG_DEVICE=${IMG_DEVICE-cpu}
- VISION_MODEL=${VISION_MODEL}
- LLM_BATCH_SIZE=${LLM_BATCH_SIZE-1024}
- SD_MODEL=${SD_MODEL}
restart: unless-stopped
Expand Down

0 comments on commit 23c0fd8

Please sign in to comment.