From 0d2c0ed919a925adf1dea1d572d56cfa3754dacc Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Mon, 3 Mar 2025 22:28:54 +0530
Subject: [PATCH 01/12] feat: googleai llm addition

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/constants.py    |   2 +
 backend/director/llm/__init__.py |   4 +
 backend/director/llm/googleai.py | 196 +++++++++++++++++++++++++++++++
 3 files changed, 202 insertions(+)
 create mode 100644 backend/director/llm/googleai.py

diff --git a/backend/director/constants.py b/backend/director/constants.py
index f370aac9..a398d134 100644
--- a/backend/director/constants.py
+++ b/backend/director/constants.py
@@ -19,6 +19,7 @@ class LLMType(str, Enum):
 
     OPENAI = "openai"
     ANTHROPIC = "anthropic"
+    GOOGLEAI = "googleai"
     VIDEODB_PROXY = "videodb_proxy"
 
 
@@ -27,5 +28,6 @@ class EnvPrefix(str, Enum):
 
     OPENAI_ = "OPENAI_"
     ANTHROPIC_ = "ANTHROPIC_"
+    GOOGLEAI_ = "GOOGLEAI_"
 
 DOWNLOADS_PATH="director/downloads"
diff --git a/backend/director/llm/__init__.py b/backend/director/llm/__init__.py
index b909bb3b..71e79c4c 100644
--- a/backend/director/llm/__init__.py
+++ b/backend/director/llm/__init__.py
@@ -4,6 +4,7 @@
 
 from director.llm.openai import OpenAI
 from director.llm.anthropic import AnthropicAI
+from director.llm.googleai import GoogleAI
 from director.llm.videodb_proxy import VideoDBProxy
 
 
@@ -12,6 +13,7 @@ def get_default_llm():
 
     openai = True if os.getenv("OPENAI_API_KEY") else False
     anthropic = True if os.getenv("ANTHROPIC_API_KEY") else False
+    googleai = True if os.getenv("GOOGLEAI_API_KEY") else False
 
     default_llm = os.getenv("DEFAULT_LLM")
 
@@ -19,5 +21,7 @@ def get_default_llm():
         return OpenAI()
     elif anthropic or default_llm == LLMType.ANTHROPIC:
         return AnthropicAI()
+    elif googleai or default_llm == LLMType.GOOGLEAI:
+        return GoogleAI()
     else:
         return VideoDBProxy()
diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py
new file mode 100644
index 00000000..278544c3
--- /dev/null
+++ b/backend/director/llm/googleai.py
@@ -0,0 +1,196 @@
+import json
+from enum import Enum
+
+from pydantic import Field, field_validator, FieldValidationInfo
+from pydantic_settings import SettingsConfigDict
+
+from director.core.session import RoleTypes
+from director.llm.base import BaseLLM, BaseLLMConfig, LLMResponse, LLMResponseStatus
+from director.constants import (
+    LLMType,
+    EnvPrefix,
+)
+
+
+class GoogleChatModel(str, Enum):
+    """Enum for Google Gemini Chat models"""
+
+    GEMINI_15_FLASH = "gemini-1.5-flash"
+    GEMINI_15_PRO = "gemini-1.5-pro"
+    GEMINI_15_ULTRA = "gemini-1.5-ultra"
+
+
+class GoogleAIConfig(BaseLLMConfig):
+    """GoogleAI Config"""
+
+    model_config = SettingsConfigDict(
+        env_prefix=EnvPrefix.GOOGLEAI_,
+        extra="ignore",
+    )
+
+    llm_type: str = LLMType.GOOGLEAI
+    api_key: str = ""
+    api_base: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
+    chat_model: str = Field(default=GoogleChatModel.GEMINI_15_FLASH)
+    max_tokens: int = 4096
+    temperature: float = 0.7
+    top_p: float = 1.0
+    timeout: int = 30
+
+    @field_validator("api_key")
+    @classmethod
+    def validate_non_empty(cls, v, info: FieldValidationInfo):
+        if not v:
+            raise ValueError(
+                f"{info.field_name} must not be empty. Please set {EnvPrefix.GOOGLEAI_.value}{info.field_name.upper()} environment variable."
+            )
+        return v
+
+
+class GoogleAI(BaseLLM):
+    def __init__(self, config: GoogleAIConfig = None):
+        """
+        :param config: GoogleAI Config
+        """
+        if config is None:
+            config = GoogleAIConfig()
+        super().__init__(config=config)
+        try:
+            import openai
+        except ImportError:
+            raise ImportError("Please install OpenAI python library.")
+
+        self.client = openai.OpenAI(
+            api_key=self.config.api_key, base_url=self.config.api_base
+        )
+
+    def _format_messages(self, messages: list):
+        """Format the messages to the format that Google Gemini expects."""
+        formatted_messages = []
+
+        if messages and messages[0]["role"] == RoleTypes.system.value:
+            messages = messages[1:]
+
+        for message in messages:
+            message["content"] = message.get("content", "") or ""
+
+            if message["role"] == RoleTypes.assistant.value and message.get(
+                "tool_calls"
+            ):
+                formatted_messages.append(
+                    {
+                        "role": message["role"],
+                        "content": message["content"],
+                        "tool_calls": [
+                            {
+                                "id": tool_call["id"],
+                                "function": {
+                                    "name": tool_call.get("tool", {}).get("name", ""),
+                                    "arguments": json.dumps(
+                                        tool_call.get("tool", {}).get("arguments", {})
+                                    ),
+                                },
+                                "type": tool_call["type"],
+                            }
+                            for tool_call in message["tool_calls"]
+                        ],
+                    }
+                )
+            elif message["role"] == RoleTypes.tool.value:
+                formatted_messages.append(
+                    {
+                        "role": RoleTypes.tool.value,
+                        "content": [
+                            {
+                                "type": "tool_result",
+                                "tool_use_id": message["tool_call_id"],
+                                "content": message["content"],
+                            }
+                        ],
+                    }
+                )
+            else:
+                formatted_messages.append(message)
+
+        return formatted_messages
+
+    def _format_tools(self, tools: list):
+        """Format the tools to the format that Gemini expects."""
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": tool.get("name", ""),
+                    "description": tool.get("description", ""),
+                    "parameters": tool.get("parameters", {}),
+                },
+            }
+            for tool in tools
+            if tool.get("name")
+        ]
+
+    def chat_completions(
+        self, messages: list, tools: list = [], stop=None, response_format=None
+    ):
+        """Get completions for chat using Gemini 1.5 Flash."""
+
+        params = {
+            "model": self.config.chat_model,
+            "messages": self._format_messages(messages),
+            "temperature": self.config.temperature or 0.7,
+            "max_tokens": self.config.max_tokens or 4096,
+            "top_p": self.config.top_p or 1.0,
+            "stop": stop if stop else None,
+            "timeout": self.config.timeout or 30,
+        }
+
+        if tools:
+            params["tools"] = self._format_tools(tools)
+            params["tool_choice"] = "auto"
+
+        if response_format:
+            params["response_format"] = response_format
+
+        params = {k: v for k, v in params.items() if v is not None}
+
+        try:
+            response = self.client.chat.completions.create(**params)
+        except Exception as e:
+            print(f"Error: {e}")
+            return LLMResponse(content=f"Error: {e}")
+
+        content = (
+            response.choices[0].message.content
+            if response.choices and response.choices[0].message.content
+            else "No response"
+        )
+
+        tool_calls = (
+            [
+                {
+                    "id": tool_call.id,
+                    "tool": {
+                        "name": tool_call.function.name,
+                        "arguments": json.loads(tool_call.function.arguments),
+                    },
+                    "type": tool_call.type,
+                }
+                for tool_call in response.choices[0].message.tool_calls
+            ]
+            if response.choices and response.choices[0].message.tool_calls
+            else []
+        )
+
+        send_tokens = getattr(response.usage, "prompt_tokens", 0)
+        recv_tokens = getattr(response.usage, "completion_tokens", 0)
+        total_tokens = getattr(response.usage, "total_tokens", 0)
+
+        return LLMResponse(
+            content=content,
+            tool_calls=tool_calls,
+            finish_reason=response.choices[0].finish_reason if response.choices else "",
+            send_tokens=send_tokens,
+            recv_tokens=recv_tokens,
+            total_tokens=total_tokens,
+            status=LLMResponseStatus.SUCCESS,
+        )

From 98d58105a024964ee2780e5cb8e8049d6e5142ec Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Wed, 5 Mar 2025 13:53:50 +0530
Subject: [PATCH 02/12] fix: models, unwanted params

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/llm/googleai.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py
index 278544c3..d4498fd1 100644
--- a/backend/director/llm/googleai.py
+++ b/backend/director/llm/googleai.py
@@ -16,8 +16,12 @@ class GoogleChatModel(str, Enum):
     """Enum for Google Gemini Chat models"""
 
     GEMINI_15_FLASH = "gemini-1.5-flash"
+    GEMINI_15_FLASH_002 = "gemini-1.5-flash-002"
     GEMINI_15_PRO = "gemini-1.5-pro"
-    GEMINI_15_ULTRA = "gemini-1.5-ultra"
+    GEMINI_15_PRO_002 = "gemini-1.5-pro-002"
+    GEMINI_20_FLASH = "gemini-2.0-flash"
+    GEMINI_20_FLASH_001 = "gemini-2.0-flash-001"
+    GEMINI_20_PRO = "gemini-2.0-pro-exp"
 
 
 class GoogleAIConfig(BaseLLMConfig):
@@ -31,7 +35,7 @@ class GoogleAIConfig(BaseLLMConfig):
     llm_type: str = LLMType.GOOGLEAI
     api_key: str = ""
     api_base: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
-    chat_model: str = Field(default=GoogleChatModel.GEMINI_15_FLASH)
+    chat_model: str = Field(default=GoogleChatModel.GEMINI_20_FLASH)
     max_tokens: int = 4096
     temperature: float = 0.7
     top_p: float = 1.0
@@ -140,8 +144,6 @@ def chat_completions(
             "temperature": self.config.temperature or 0.7,
             "max_tokens": self.config.max_tokens or 4096,
             "top_p": self.config.top_p or 1.0,
-            "stop": stop if stop else None,
-            "timeout": self.config.timeout or 30,
         }
 
         if tools:

From b22dd3010c726c7e291747febb7ae71734357b9d Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Wed, 5 Mar 2025 19:33:48 +0530
Subject: [PATCH 03/12] fix: parameters to gemini compatible format

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/agents/base.py | 22 +++++++++++++++++++++-
 backend/director/handler.py     |  4 ++--
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/backend/director/agents/base.py b/backend/director/agents/base.py
index 3a37a42d..a5f467cb 100644
--- a/backend/director/agents/base.py
+++ b/backend/director/agents/base.py
@@ -39,7 +39,27 @@ def get_parameters(self):
             raise Exception(
                 "Failed to infere parameters, please define JSON instead of using this automated util."
             )
-        return parameters
+
+        parameters["properties"].pop("args", None)
+        parameters["properties"].pop("kwargs", None)
+
+        if "required" in parameters:
+            parameters["required"] = [
+                param for param in parameters["required"] if param not in ["args", "kwargs"]
+            ]
+
+        required_params = set(parameters.get("required", []))
+        parameters["properties"] = {
+            key: value
+            for key, value in parameters["properties"].items()
+            if key in required_params
+        }
+
+        return {
+            "type": "object",
+            "properties": parameters["properties"],
+            "required": parameters.get("required", []),
+        }
 
     def to_llm_format(self):
         """Convert the agent to LLM tool format."""
diff --git a/backend/director/handler.py b/backend/director/handler.py
index 2dbb64b9..11c381a3 100644
--- a/backend/director/handler.py
+++ b/backend/director/handler.py
@@ -49,7 +49,7 @@ def __init__(self, db, **kwargs):
             SummarizeVideoAgent,
             DownloadAgent,
             PricingAgent,
-            UploadAgent,
+            # UploadAgent,
             SearchAgent,
             PromptClipAgent,
             IndexAgent,
@@ -62,7 +62,7 @@ def __init__(self, db, **kwargs):
             SubtitleAgent,
             SlackAgent,
             EditingAgent,
-            DubbingAgent,
+            # DubbingAgent,
             TranscriptionAgent,
             TextToMovieAgent,
             MemeMakerAgent,

From 5b550be2e15007c71e7ec0049919f7adabea92de Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Thu, 6 Mar 2025 10:59:43 +0530
Subject: [PATCH 04/12] fix: dubbing, upload compatible to gemini

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/agents/dubbing.py |  8 ++---
 backend/director/agents/upload.py  | 58 +++++++++++++++---------------
 backend/director/handler.py        |  4 +--
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/backend/director/agents/dubbing.py b/backend/director/agents/dubbing.py
index 4c2f75ec..1d4310af 100644
--- a/backend/director/agents/dubbing.py
+++ b/backend/director/agents/dubbing.py
@@ -35,10 +35,10 @@
             "description": "The dubbing engine to use. Default is 'elevenlabs'. Possible values include 'elevenlabs'.",
             "default": "elevenlabs",
         },
-        "engine_params": {
-            "type": "object",
-            "description": "Optional parameters for the dubbing engine.",
-        },
+        # "engine_params": {
+        #     "type": "object",
+        #     "description": "Optional parameters for the dubbing engine.",
+        # },
     },
     "required": [
         "video_id",
diff --git a/backend/director/agents/upload.py b/backend/director/agents/upload.py
index 48a41b83..ff3dbcf9 100644
--- a/backend/director/agents/upload.py
+++ b/backend/director/agents/upload.py
@@ -15,34 +15,34 @@
 
 logger = logging.getLogger(__name__)
 
-UPLOAD_AGENT_PARAMETERS = {
-    "type": "object",
-    "properties": {
-        "source": {
-            "type": "string",
-            "description": "URL or local path to upload the content",
-        },
-        "source_type": {
-            "type": "string",
-            "description": "Type of given source.",
-            "enum": ["url", "local_file"],
-        },
-        "name": {
-            "type": "string",
-            "description": "Name of the content to upload, optional parameter",
-        },
-        "media_type": {
-            "type": "string",
-            "enum": ["video", "audio", "image"],
-            "description": "Type of media to upload, default is video",
-        },
-        "collection_id": {
-            "type": "string",
-            "description": "Collection ID to upload the content",
-        },
-    },
-    "required": ["url", "media_type", "collection_id"],
-}
+# UPLOAD_AGENT_PARAMETERS = {
+#     "type": "object",
+#     "properties": {
+#         "source": {
+#             "type": "string",
+#             "description": "URL or local path to upload the content",
+#         },
+#         "source_type": {
+#             "type": "string",
+#             "description": "Type of given source.",
+#             "enum": ["url", "local_file"],
+#         },
+#         "name": {
+#             "type": "string",
+#             "description": "Name of the content to upload, optional parameter",
+#         },
+#         "media_type": {
+#             "type": "string",
+#             "enum": ["video", "audio", "image"],
+#             "description": "Type of media to upload, default is video",
+#         },
+#         "collection_id": {
+#             "type": "string",
+#             "description": "Collection ID to upload the content",
+#         },
+#     },
+#     "required": ["url", "media_type", "collection_id"],
+# }
 
 
 class UploadAgent(BaseAgent):
@@ -54,7 +54,7 @@ def __init__(self, session: Session, **kwargs):
             "The media content can be a video, audio, or image file. "
             "Youtube playlist and links are also supported. "
         )
-        self.parameters = UPLOAD_AGENT_PARAMETERS
+        self.parameters = self.get_parameters()
         super().__init__(session=session, **kwargs)
 
     def _upload(self, source: str, source_type: str, media_type: str, name: str = None):
diff --git a/backend/director/handler.py b/backend/director/handler.py
index 11c381a3..2dbb64b9 100644
--- a/backend/director/handler.py
+++ b/backend/director/handler.py
@@ -49,7 +49,7 @@ def __init__(self, db, **kwargs):
             SummarizeVideoAgent,
             DownloadAgent,
             PricingAgent,
-            # UploadAgent,
+            UploadAgent,
             SearchAgent,
             PromptClipAgent,
             IndexAgent,
@@ -62,7 +62,7 @@ def __init__(self, db, **kwargs):
             SubtitleAgent,
             SlackAgent,
             EditingAgent,
-            # DubbingAgent,
+            DubbingAgent,
             TranscriptionAgent,
             TextToMovieAgent,
             MemeMakerAgent,

From eacddf499e3b3bb02b6ce554dbd9a03646360268 Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Thu, 6 Mar 2025 14:25:59 +0530
Subject: [PATCH 05/12] fix: unwanted dubbing parameter, url upload validation

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/agents/dubbing.py |  6 ---
 backend/director/agents/upload.py  | 80 +++++++++++++++++++-----------
 2 files changed, 51 insertions(+), 35 deletions(-)

diff --git a/backend/director/agents/dubbing.py b/backend/director/agents/dubbing.py
index 1d4310af..2b03c6cf 100644
--- a/backend/director/agents/dubbing.py
+++ b/backend/director/agents/dubbing.py
@@ -35,10 +35,6 @@
             "description": "The dubbing engine to use. Default is 'elevenlabs'. Possible values include 'elevenlabs'.",
             "default": "elevenlabs",
         },
-        # "engine_params": {
-        #     "type": "object",
-        #     "description": "Optional parameters for the dubbing engine.",
-        # },
     },
     "required": [
         "video_id",
@@ -66,7 +62,6 @@ def run(
         target_language_code: str,
         collection_id: str,
         engine: str,
-        engine_params: dict = {},
         *args,
         **kwargs,
     ) -> AgentResponse:
@@ -77,7 +72,6 @@ def run(
         :param str target_language_code: The target language code for dubbing (e.g. es).
         :param str collection_id: The ID of the collection to process.
         :param str engine: The dubbing engine to use. Default is 'elevenlabs'.
-        :param dict engine_params: Optional parameters for the dubbing engine.
         :param args: Additional positional arguments.
         :param kwargs: Additional keyword arguments.
         :return: The response containing information about the dubbing operation.
diff --git a/backend/director/agents/upload.py b/backend/director/agents/upload.py
index ff3dbcf9..3ea40769 100644
--- a/backend/director/agents/upload.py
+++ b/backend/director/agents/upload.py
@@ -15,34 +15,34 @@
 
 logger = logging.getLogger(__name__)
 
-# UPLOAD_AGENT_PARAMETERS = {
-#     "type": "object",
-#     "properties": {
-#         "source": {
-#             "type": "string",
-#             "description": "URL or local path to upload the content",
-#         },
-#         "source_type": {
-#             "type": "string",
-#             "description": "Type of given source.",
-#             "enum": ["url", "local_file"],
-#         },
-#         "name": {
-#             "type": "string",
-#             "description": "Name of the content to upload, optional parameter",
-#         },
-#         "media_type": {
-#             "type": "string",
-#             "enum": ["video", "audio", "image"],
-#             "description": "Type of media to upload, default is video",
-#         },
-#         "collection_id": {
-#             "type": "string",
-#             "description": "Collection ID to upload the content",
-#         },
-#     },
-#     "required": ["url", "media_type", "collection_id"],
-# }
+UPLOAD_AGENT_PARAMETERS = {
+    "type": "object",
+    "properties": {
+        "source": {
+            "type": "string",
+            "description": "URL or local path to upload the content",
+        },
+        "source_type": {
+            "type": "string",
+            "description": "Type of given source.",
+            "enum": ["url", "local_file"],
+        },
+        "name": {
+            "type": "string",
+            "description": "Name of the content to upload, optional parameter",
+        },
+        "media_type": {
+            "type": "string",
+            "enum": ["video", "audio", "image"],
+            "description": "Type of media to upload, default is video",
+        },
+        "collection_id": {
+            "type": "string",
+            "description": "Collection ID to upload the content",
+        },
+    },
+    "required": ["media_type", "collection_id"],
+}
 
 
 class UploadAgent(BaseAgent):
@@ -54,11 +54,19 @@ def __init__(self, session: Session, **kwargs):
             "The media content can be a video, audio, or image file. "
             "Youtube playlist and links are also supported. "
         )
-        self.parameters = self.get_parameters()
+        self.parameters = UPLOAD_AGENT_PARAMETERS
         super().__init__(session=session, **kwargs)
 
     def _upload(self, source: str, source_type: str, media_type: str, name: str = None):
         """Upload the media with the given URL."""
+
+        if not source or not isinstance(source, str) or not source.strip():
+            return AgentResponse(
+                status=AgentStatus.ERROR,
+                message="Invalid source: A valid URL or local path is required for upload.",
+                data={},
+            )      
+
         try:
             if media_type == "video":
                 content = VideoContent(
@@ -161,6 +169,20 @@ def run(
         :return: AgentResponse - The response containing information about the upload operation.
         """
 
+        if not source or not isinstance(source, str) or not source.strip():
+            return AgentResponse(
+                status=AgentStatus.ERROR,
+                message="Invalid source: A valid URL or local path is required.",
+                data={},
+            )
+
+        if source_type not in ["url", "local_file"]:
+            return AgentResponse(
+                status=AgentStatus.ERROR,
+                message=f"Invalid source type '{source_type}'. Must be 'url' or 'local_file'.",
+                data={},
+            )
+
         self.videodb_tool = VideoDBTool(collection_id=collection_id)
 
         if source_type == "local_file":

From ace2d7b35cf674c930e85f3476a5d96e0cf0541c Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Thu, 6 Mar 2025 15:03:08 +0530
Subject: [PATCH 06/12] fix: url with source in required

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/agents/upload.py | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/backend/director/agents/upload.py b/backend/director/agents/upload.py
index 3ea40769..6990c6d3 100644
--- a/backend/director/agents/upload.py
+++ b/backend/director/agents/upload.py
@@ -41,7 +41,7 @@
             "description": "Collection ID to upload the content",
         },
     },
-    "required": ["media_type", "collection_id"],
+    "required": ["source", "media_type", "collection_id"],
 }
 
 
@@ -59,14 +59,6 @@ def __init__(self, session: Session, **kwargs):
 
     def _upload(self, source: str, source_type: str, media_type: str, name: str = None):
         """Upload the media with the given URL."""
-
-        if not source or not isinstance(source, str) or not source.strip():
-            return AgentResponse(
-                status=AgentStatus.ERROR,
-                message="Invalid source: A valid URL or local path is required for upload.",
-                data={},
-            )      
-
         try:
             if media_type == "video":
                 content = VideoContent(
@@ -169,20 +161,6 @@ def run(
         :return: AgentResponse - The response containing information about the upload operation.
         """
 
-        if not source or not isinstance(source, str) or not source.strip():
-            return AgentResponse(
-                status=AgentStatus.ERROR,
-                message="Invalid source: A valid URL or local path is required.",
-                data={},
-            )
-
-        if source_type not in ["url", "local_file"]:
-            return AgentResponse(
-                status=AgentStatus.ERROR,
-                message=f"Invalid source type '{source_type}'. Must be 'url' or 'local_file'.",
-                data={},
-            )
-
         self.videodb_tool = VideoDBTool(collection_id=collection_id)
 
         if source_type == "local_file":

From 372abb91e2750d5525d34924afca9503ee7b642c Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Fri, 7 Mar 2025 16:08:50 +0530
Subject: [PATCH 07/12] fix: null type and unneccessary changes

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/agents/base.py     | 17 ++++-------------
 backend/director/agents/download.py |  9 ++++++++-
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/backend/director/agents/base.py b/backend/director/agents/base.py
index a5f467cb..5fc9e3c7 100644
--- a/backend/director/agents/base.py
+++ b/backend/director/agents/base.py
@@ -45,21 +45,12 @@ def get_parameters(self):
 
         if "required" in parameters:
             parameters["required"] = [
-                param for param in parameters["required"] if param not in ["args", "kwargs"]
+                param
+                for param in parameters["required"]
+                if param not in ["args", "kwargs"]
             ]
 
-        required_params = set(parameters.get("required", []))
-        parameters["properties"] = {
-            key: value
-            for key, value in parameters["properties"].items()
-            if key in required_params
-        }
-
-        return {
-            "type": "object",
-            "properties": parameters["properties"],
-            "required": parameters.get("required", []),
-        }
+        return parameters
 
     def to_llm_format(self):
         """Convert the agent to LLM tool format."""
diff --git a/backend/director/agents/download.py b/backend/director/agents/download.py
index 24c531db..01f475d5 100644
--- a/backend/director/agents/download.py
+++ b/backend/director/agents/download.py
@@ -14,7 +14,14 @@ def __init__(self, session: Session, **kwargs):
         self.parameters = self.get_parameters()
         super().__init__(session=session, **kwargs)
 
-    def run(self, stream_link: str, name: str = None, *args, **kwargs) -> AgentResponse:
+    def run(
+        self,
+        stream_link: str,
+        name: str = None,
+        stream_name: str = None,
+        *args,
+        **kwargs,
+    ) -> AgentResponse:
         """
         Downloads the video from the given stream link.
 

From 0875c0c8d5d54e0f602ebbc559d29e9b4fb87c5c Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Fri, 7 Mar 2025 17:05:34 +0530
Subject: [PATCH 08/12] fix: unnecessary variables, env sample

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/.env.sample              |  1 +
 backend/director/llm/googleai.py | 62 +++++++++++++-------------------
 2 files changed, 26 insertions(+), 37 deletions(-)

diff --git a/backend/.env.sample b/backend/.env.sample
index e4251386..e48f9645 100644
--- a/backend/.env.sample
+++ b/backend/.env.sample
@@ -18,6 +18,7 @@ SQLITE_DB_PATH=
 # LLM Integrations
 OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
+GOOGLEAI_API_KEY=
 
 # Tools
 REPLICATE_API_TOKEN=
diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py
index d4498fd1..56231c49 100644
--- a/backend/director/llm/googleai.py
+++ b/backend/director/llm/googleai.py
@@ -15,13 +15,13 @@
 class GoogleChatModel(str, Enum):
     """Enum for Google Gemini Chat models"""
 
-    GEMINI_15_FLASH = "gemini-1.5-flash"
-    GEMINI_15_FLASH_002 = "gemini-1.5-flash-002"
-    GEMINI_15_PRO = "gemini-1.5-pro"
-    GEMINI_15_PRO_002 = "gemini-1.5-pro-002"
-    GEMINI_20_FLASH = "gemini-2.0-flash"
-    GEMINI_20_FLASH_001 = "gemini-2.0-flash-001"
-    GEMINI_20_PRO = "gemini-2.0-pro-exp"
+    GEMINI_1_5_FLASH = "gemini-1.5-flash"
+    GEMINI_1_5_FLASH_0_0_2 = "gemini-1.5-flash-002"
+    GEMINI_1_5_PRO = "gemini-1.5-pro"
+    GEMINI_1_5_PRO_0_0_2 = "gemini-1.5-pro-002"
+    GEMINI_2_0_FLASH = "gemini-2.0-flash"
+    GEMINI_2_0_FLASH_0_0_1 = "gemini-2.0-flash-001"
+    GEMINI_2_0_PRO = "gemini-2.0-pro-exp"
 
 
 class GoogleAIConfig(BaseLLMConfig):
@@ -35,11 +35,8 @@ class GoogleAIConfig(BaseLLMConfig):
     llm_type: str = LLMType.GOOGLEAI
     api_key: str = ""
     api_base: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
-    chat_model: str = Field(default=GoogleChatModel.GEMINI_20_FLASH)
+    chat_model: str = Field(default=GoogleChatModel.GEMINI_2_0_FLASH)
     max_tokens: int = 4096
-    temperature: float = 0.7
-    top_p: float = 1.0
-    timeout: int = 30
 
     @field_validator("api_key")
     @classmethod
@@ -65,7 +62,7 @@ def __init__(self, config: GoogleAIConfig = None):
             raise ImportError("Please install OpenAI python library.")
 
         self.client = openai.OpenAI(
-            api_key=self.config.api_key, base_url=self.config.api_base
+            api_key=self.api_key, base_url=self.api_base
         )
 
     def _format_messages(self, messages: list):
@@ -134,16 +131,15 @@ def _format_tools(self, tools: list):
         ]
 
     def chat_completions(
-        self, messages: list, tools: list = [], stop=None, response_format=None
+        self, messages: list, tools: list = [], response_format=None
     ):
-        """Get completions for chat using Gemini 1.5 Flash."""
-
+        """Get chat completions using Gemini."""
         params = {
-            "model": self.config.chat_model,
+            "model": self.chat_model,
             "messages": self._format_messages(messages),
-            "temperature": self.config.temperature or 0.7,
-            "max_tokens": self.config.max_tokens or 4096,
-            "top_p": self.config.top_p or 1.0,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "top_p": self.top_p,
         }
 
         if tools:
@@ -153,46 +149,38 @@ def chat_completions(
         if response_format:
             params["response_format"] = response_format
 
-        params = {k: v for k, v in params.items() if v is not None}
-
         try:
             response = self.client.chat.completions.create(**params)
         except Exception as e:
             print(f"Error: {e}")
             return LLMResponse(content=f"Error: {e}")
 
+        choice = response.choices[0] if response.choices else None
         content = (
-            response.choices[0].message.content
-            if response.choices and response.choices[0].message.content
+            choice.message.content
+            if choice and choice.message.content
             else "No response"
         )
 
         tool_calls = (
             [
                 {
-                    "id": tool_call.id,
+                    "id": tc.id,
                     "tool": {
-                        "name": tool_call.function.name,
-                        "arguments": json.loads(tool_call.function.arguments),
+                        "name": tc.function.name,
+                        "arguments": json.loads(tc.function.arguments),
                     },
-                    "type": tool_call.type,
+                    "type": tc.type,
                 }
-                for tool_call in response.choices[0].message.tool_calls
+                for tc in choice.message.tool_calls
             ]
-            if response.choices and response.choices[0].message.tool_calls
+            if choice and choice.message.tool_calls
             else []
         )
 
-        send_tokens = getattr(response.usage, "prompt_tokens", 0)
-        recv_tokens = getattr(response.usage, "completion_tokens", 0)
-        total_tokens = getattr(response.usage, "total_tokens", 0)
-
         return LLMResponse(
             content=content,
             tool_calls=tool_calls,
-            finish_reason=response.choices[0].finish_reason if response.choices else "",
-            send_tokens=send_tokens,
-            recv_tokens=recv_tokens,
-            total_tokens=total_tokens,
+            finish_reason=choice.finish_reason if choice else "",
             status=LLMResponseStatus.SUCCESS,
         )

From ddf38ae14034b4be12ee047e4e5306e20c4e3bc4 Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Fri, 7 Mar 2025 18:55:58 +0530
Subject: [PATCH 09/12] fix: openai structure, docs

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/llm/googleai.py | 101 +++++++++++++++----------------
 docs/llm/googleai.md             |  15 +++++
 2 files changed, 63 insertions(+), 53 deletions(-)
 create mode 100644 docs/llm/googleai.md

diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py
index 56231c49..5a8d54d6 100644
--- a/backend/director/llm/googleai.py
+++ b/backend/director/llm/googleai.py
@@ -4,7 +4,7 @@
 from pydantic import Field, field_validator, FieldValidationInfo
 from pydantic_settings import SettingsConfigDict
 
-from director.core.session import RoleTypes
+
 from director.llm.base import BaseLLM, BaseLLMConfig, LLMResponse, LLMResponseStatus
 from director.constants import (
     LLMType,
@@ -69,15 +69,8 @@ def _format_messages(self, messages: list):
         """Format the messages to the format that Google Gemini expects."""
         formatted_messages = []
 
-        if messages and messages[0]["role"] == RoleTypes.system.value:
-            messages = messages[1:]
-
         for message in messages:
-            message["content"] = message.get("content", "") or ""
-
-            if message["role"] == RoleTypes.assistant.value and message.get(
-                "tool_calls"
-            ):
+            if message["role"] == "assistant" and message.get("tool_calls"):
                 formatted_messages.append(
                     {
                         "role": message["role"],
@@ -86,10 +79,8 @@ def _format_messages(self, messages: list):
                             {
                                 "id": tool_call["id"],
                                 "function": {
-                                    "name": tool_call.get("tool", {}).get("name", ""),
-                                    "arguments": json.dumps(
-                                        tool_call.get("tool", {}).get("arguments", {})
-                                    ),
+                                    "name": tool_call["tool"]["name"],
+                                    "arguments": json.dumps(tool_call["tool"]["arguments"]),
                                 },
                                 "type": tool_call["type"],
                             }
@@ -97,26 +88,40 @@ def _format_messages(self, messages: list):
                         ],
                     }
                 )
-            elif message["role"] == RoleTypes.tool.value:
-                formatted_messages.append(
-                    {
-                        "role": RoleTypes.tool.value,
-                        "content": [
-                            {
-                                "type": "tool_result",
-                                "tool_use_id": message["tool_call_id"],
-                                "content": message["content"],
-                            }
-                        ],
-                    }
-                )
             else:
                 formatted_messages.append(message)
 
         return formatted_messages
 
     def _format_tools(self, tools: list):
-        """Format the tools to the format that Gemini expects."""
+        """Format the tools to the format that Gemini expects.
+
+        **Example**::
+
+            [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "description": "Get the weather in a given location",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "location": {
+                                    "type": "string",
+                                    "description": "The city and state, e.g. Chicago, IL"
+                                },
+                                "unit": {
+                                    "type": "string",
+                                    "enum": ["celsius", "fahrenheit"]
+                                }
+                            },
+                            "required": ["location"]
+                        }
+                    }
+                }
+            ]
+        """
         return [
             {
                 "type": "function",
@@ -130,10 +135,11 @@ def _format_tools(self, tools: list):
             if tool.get("name")
         ]
 
-    def chat_completions(
-        self, messages: list, tools: list = [], response_format=None
-    ):
-        """Get chat completions using Gemini."""
+    def chat_completions(self, messages: list, tools: list = [], response_format=None):
+        """Get chat completions using Gemini.
+
+        docs: https://ai.google.dev/gemini-api/docs/openai
+        """
         params = {
             "model": self.chat_model,
             "messages": self._format_messages(messages),
@@ -155,32 +161,21 @@ def chat_completions(
             print(f"Error: {e}")
             return LLMResponse(content=f"Error: {e}")
 
-        choice = response.choices[0] if response.choices else None
-        content = (
-            choice.message.content
-            if choice and choice.message.content
-            else "No response"
-        )
-
-        tool_calls = (
-            [
+        return LLMResponse(
+            content=response.choices[0].message.content or "",
+            tool_calls=[
                 {
-                    "id": tc.id,
+                    "id": tool_call.id,
                     "tool": {
-                        "name": tc.function.name,
-                        "arguments": json.loads(tc.function.arguments),
+                        "name": tool_call.function.name,
+                        "arguments": json.loads(tool_call.function.arguments),
                     },
-                    "type": tc.type,
+                    "type": tool_call.type,
                 }
-                for tc in choice.message.tool_calls
+                for tool_call in response.choices[0].message.tool_calls
             ]
-            if choice and choice.message.tool_calls
-            else []
-        )
-
-        return LLMResponse(
-            content=content,
-            tool_calls=tool_calls,
-            finish_reason=choice.finish_reason if choice else "",
+            if response.choices[0].message.tool_calls
+            else [],
+            finish_reason=response.choices[0].finish_reason,
             status=LLMResponseStatus.SUCCESS,
         )
diff --git a/docs/llm/googleai.md b/docs/llm/googleai.md
new file mode 100644
index 00000000..9cbe75eb
--- /dev/null
+++ b/docs/llm/googleai.md
@@ -0,0 +1,15 @@
+## GoogleAI
+
+GoogleAI extends the Base LLM and implements the Google Gemini API.
+
+### GoogleAI Config
+
+GoogleAI Config is the configuration object for Google Gemini. It is used to configure Google Gemini and is passed to GoogleAI when it is created.
+
+::: director.llm.googleai.GoogleAIConfig
+
+### GoogleAI Interface
+
+GoogleAI is the LLM used by the agents and tools. It is used to generate responses to messages.
+
+::: director.llm.googleai.GoogleAI

From d07aefa9c4558537160d7f0178ed882b14f12e48 Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Fri, 7 Mar 2025 19:38:54 +0530
Subject: [PATCH 10/12] fix: basellm class, docs

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/llm/googleai.py | 8 +++++++-
 mkdocs.yml                       | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py
index 5a8d54d6..41c658bf 100644
--- a/backend/director/llm/googleai.py
+++ b/backend/director/llm/googleai.py
@@ -135,7 +135,9 @@ def _format_tools(self, tools: list):
             if tool.get("name")
         ]
 
-    def chat_completions(self, messages: list, tools: list = [], response_format=None):
+    def chat_completions(
+        self, messages: list, tools: list = [], stop=None, response_format=None
+    ):
         """Get chat completions using Gemini.
 
         docs: https://ai.google.dev/gemini-api/docs/openai
@@ -146,6 +148,7 @@ def chat_completions(self, messages: list, tools: list = [], response_format=Non
             "temperature": self.temperature,
             "max_tokens": self.max_tokens,
             "top_p": self.top_p,
+            "timeout": self.timeout,
         }
 
         if tools:
@@ -177,5 +180,8 @@ def chat_completions(self, messages: list, tools: list = [], response_format=Non
             if response.choices[0].message.tool_calls
             else [],
             finish_reason=response.choices[0].finish_reason,
+            send_tokens=response.usage.prompt_tokens,
+            recv_tokens=response.usage.completion_tokens,
+            total_tokens=response.usage.total_tokens,
             status=LLMResponseStatus.SUCCESS,
         )
diff --git a/mkdocs.yml b/mkdocs.yml
index 196ad5e9..d7ef08b1 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -74,6 +74,7 @@ nav:
     - Integrations:
       - 'OpenAI': 'llm/openai.md'
       - 'AnthropicAI': 'llm/anthropic.md'
+      - 'GoogleAI': 'llm/googleai.md'
   - 'Database':
     - 'Interface': 'database/interface.md'
     - Integrations:

From bb3c63cc198253f4c5dc49fbd70b450cfce23b57 Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Mon, 10 Mar 2025 14:14:51 +0530
Subject: [PATCH 11/12] fix: reasoning error

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/llm/googleai.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py
index 41c658bf..0d519721 100644
--- a/backend/director/llm/googleai.py
+++ b/backend/director/llm/googleai.py
@@ -74,13 +74,17 @@ def _format_messages(self, messages: list):
                 formatted_messages.append(
                     {
                         "role": message["role"],
-                        "content": message["content"],
+                        "content": message["content"]
+                        if message["content"]
+                        else "[Processing request...]",
                         "tool_calls": [
                             {
                                 "id": tool_call["id"],
                                 "function": {
                                     "name": tool_call["tool"]["name"],
-                                    "arguments": json.dumps(tool_call["tool"]["arguments"]),
+                                    "arguments": json.dumps(
+                                        tool_call["tool"]["arguments"]
+                                    ),
                                 },
                                 "type": tool_call["type"],
                             }

From b3df15837b64fbc8ffacdeef80816191185732ca Mon Sep 17 00:00:00 2001
From: royalpinto007 <royalpinto007@gmail.com>
Date: Mon, 10 Mar 2025 21:10:33 +0530
Subject: [PATCH 12/12] fix: assistant content

Signed-off-by: royalpinto007 <royalpinto007@gmail.com>
---
 backend/director/core/session.py | 2 ++
 backend/director/llm/googleai.py | 9 ++-------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/backend/director/core/session.py b/backend/director/core/session.py
index 49f1d875..cbcdaf24 100644
--- a/backend/director/core/session.py
+++ b/backend/director/core/session.py
@@ -293,6 +293,8 @@ def to_llm_msg(self):
         if self.role == RoleTypes.assistant:
             if self.tool_calls:
                 msg["tool_calls"] = self.tool_calls
+            if not self.content:
+                msg["content"] = []
             return msg
 
         if self.role == RoleTypes.tool:
diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py
index 0d519721..78e69a64 100644
--- a/backend/director/llm/googleai.py
+++ b/backend/director/llm/googleai.py
@@ -19,9 +19,6 @@ class GoogleChatModel(str, Enum):
     GEMINI_1_5_FLASH_0_0_2 = "gemini-1.5-flash-002"
     GEMINI_1_5_PRO = "gemini-1.5-pro"
     GEMINI_1_5_PRO_0_0_2 = "gemini-1.5-pro-002"
-    GEMINI_2_0_FLASH = "gemini-2.0-flash"
-    GEMINI_2_0_FLASH_0_0_1 = "gemini-2.0-flash-001"
-    GEMINI_2_0_PRO = "gemini-2.0-pro-exp"
 
 
 class GoogleAIConfig(BaseLLMConfig):
@@ -35,7 +32,7 @@ class GoogleAIConfig(BaseLLMConfig):
     llm_type: str = LLMType.GOOGLEAI
     api_key: str = ""
     api_base: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
-    chat_model: str = Field(default=GoogleChatModel.GEMINI_2_0_FLASH)
+    chat_model: str = Field(default=GoogleChatModel.GEMINI_1_5_FLASH)
     max_tokens: int = 4096
 
     @field_validator("api_key")
@@ -74,9 +71,7 @@ def _format_messages(self, messages: list):
                 formatted_messages.append(
                     {
                         "role": message["role"],
-                        "content": message["content"]
-                        if message["content"]
-                        else "[Processing request...]",
+                        "content": message["content"],
                         "tool_calls": [
                             {
                                 "id": tool_call["id"],