Improvement to pipeline

DevXT-LLC · Apr 2, 2024 · 15bf1ea · 15bf1ea
1 parent dea8754
commit 15bf1ea
Showing 1 changed file with 23 additions and 36 deletions.
diff --git a/Pipes.py b/Pipes.py
@@ -31,7 +31,7 @@ def __init__(self):
                 logging.error(f"[VLM] Failed to load the model: {e}")
                 self.vlm = None
         if self.vlm is not None:
-            logging.info(f"[ezlocalai] Vision is enabled.")
+            logging.info(f"[ezlocalai] Vision is enabled with {self.current_vlm}.")
         self.img_enabled = os.getenv("IMG_ENABLED", "false").lower() == "true"
         self.img = None
         if self.img_enabled and img_import_success:
@@ -73,7 +73,7 @@ def __init__(self):
 
     async def get_response(self, data, completion_type="chat"):
         data["local_uri"] = self.local_uri
-        images_uploaded = False
+        images = []
         if "messages" in data:
             if isinstance(data["messages"][-1]["content"], list):
                 messages = data["messages"][-1]["content"]
@@ -82,7 +82,7 @@ async def get_response(self, data, completion_type="chat"):
                         prompt = message["text"]
                 for message in messages:
                     if "image_url" in message:
-                        images_uploaded = True
+                        images.append(message)
                     if "audio_url" in message:
                         audio_url = (
                             message["audio_url"]["url"]
@@ -126,42 +126,29 @@ async def get_response(self, data, completion_type="chat"):
             if completion_type == "chat"
             else data["prompt"]
         )
-        image_urls = []
-        if isinstance(user_message, list):
-            user_message = prompt
-            for message in messages:
-                if "image_url" in message:
-                    if "url" in message["image_url"]:
-                        image_urls.append(
-                            {
-                                "type": "image_url",
-                                "image_url": {"url": message["image_url"]["url"]},
-                            }
-                        )
-        if self.vlm and image_urls:
-            image_description = self.vlm.chat(
-                messages=[
-                    {
-                        "role": "user",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": "Describe each stage of this image.",
-                            },
-                            *image_urls,
-                        ],
-                    }
-                ],
-                max_tokens=1024,
-                temperature=data["temperature"],
-                top_p=data["top_p"],
+        if self.vlm and images:
+            new_messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Describe each stage of this image.",
+                        },
+                    ],
+                }
+            ]
+            new_messages[0]["content"].extend(images)
+            image_description = self.vlm.chat(messages=new_messages)
+            print(
+                f"Image Description: {image_description['choices'][0]['message']['content']}"
             )
-            # Add the image description to the prompt
             prompt = (
-                f"\n\nUploaded Image Description: {image_description['choices'][0]['message']['content']} {data['messages'][-1]['content']}"
+                f"\n\nReference the uploaded image description for any questions about the uploaded image. Act as if you can see it. Uploaded Image Description: {image_description['choices'][0]['message']['content']} {data['messages'][-1]['content'][0]['text']}"
                 if completion_type == "chat"
-                else f"\n\nUploaded Image Description: {image_description['choices'][0]['message']['content']} {data['prompt']}"
+                else f"\n\nReference the uploaded image description for any questions about the uploaded image. Act as if you can see it. Uploaded Image Description: {image_description['choices'][0]['message']['content']} {data['prompt']}"
             )
+            print(f"Full Prompt: {prompt}")
             if completion_type == "chat":
                 data["messages"][-1]["content"] = prompt
             else:
@@ -201,7 +188,7 @@ async def get_response(self, data, completion_type="chat"):
                 user_message = user_message.replace(
                     user_message.split("data:")[1].split("'")[0], ""
                 )
-            img_gen_prompt = f"Users message: {user_message} \n\n{'The user uploaded an image, one does not need generated unless the user is specifically asking.' if images_uploaded else ''} **The assistant is acting as sentiment analysis expert and only responds with a concise YES or NO answer on if the user would like an image as visual or a picture generated. No other explanation is needed!**\nWould the user potentially like an image generated based on their message?\nAssistant: "
+            img_gen_prompt = f"Users message: {user_message} \n\n{'The user uploaded an image, one does not need generated unless the user is specifically asking.' if images else ''} **The assistant is acting as sentiment analysis expert and only responds with a concise YES or NO answer on if the user would like an image as visual or a picture generated. No other explanation is needed!**\nWould the user potentially like an image generated based on their message?\nAssistant: "
             logging.info(f"[IMG] Decision maker prompt: {img_gen_prompt}")
             create_img = self.llm.chat(
                 messages=[{"role": "system", "content": img_gen_prompt}],