From 0d2c0ed919a925adf1dea1d572d56cfa3754dacc Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Mon, 3 Mar 2025 22:28:54 +0530 Subject: [PATCH 01/12] feat: googleai llm addition Signed-off-by: royalpinto007 --- backend/director/constants.py | 2 + backend/director/llm/__init__.py | 4 + backend/director/llm/googleai.py | 196 +++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 backend/director/llm/googleai.py diff --git a/backend/director/constants.py b/backend/director/constants.py index f370aac9..a398d134 100644 --- a/backend/director/constants.py +++ b/backend/director/constants.py @@ -19,6 +19,7 @@ class LLMType(str, Enum): OPENAI = "openai" ANTHROPIC = "anthropic" + GOOGLEAI = "googleai" VIDEODB_PROXY = "videodb_proxy" @@ -27,5 +28,6 @@ class EnvPrefix(str, Enum): OPENAI_ = "OPENAI_" ANTHROPIC_ = "ANTHROPIC_" + GOOGLEAI_ = "GOOGLEAI_" DOWNLOADS_PATH="director/downloads" diff --git a/backend/director/llm/__init__.py b/backend/director/llm/__init__.py index b909bb3b..71e79c4c 100644 --- a/backend/director/llm/__init__.py +++ b/backend/director/llm/__init__.py @@ -4,6 +4,7 @@ from director.llm.openai import OpenAI from director.llm.anthropic import AnthropicAI +from director.llm.googleai import GoogleAI from director.llm.videodb_proxy import VideoDBProxy @@ -12,6 +13,7 @@ def get_default_llm(): openai = True if os.getenv("OPENAI_API_KEY") else False anthropic = True if os.getenv("ANTHROPIC_API_KEY") else False + googleai = True if os.getenv("GOOGLEAI_API_KEY") else False default_llm = os.getenv("DEFAULT_LLM") @@ -19,5 +21,7 @@ def get_default_llm(): return OpenAI() elif anthropic or default_llm == LLMType.ANTHROPIC: return AnthropicAI() + elif googleai or default_llm == LLMType.GOOGLEAI: + return GoogleAI() else: return VideoDBProxy() diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py new file mode 100644 index 00000000..278544c3 --- /dev/null +++ b/backend/director/llm/googleai.py @@ -0,0 +1,196 @@ +import json +from enum import Enum + +from pydantic import Field, field_validator, FieldValidationInfo +from pydantic_settings import SettingsConfigDict + +from director.core.session import RoleTypes +from director.llm.base import BaseLLM, BaseLLMConfig, LLMResponse, LLMResponseStatus +from director.constants import ( + LLMType, + EnvPrefix, +) + + +class GoogleChatModel(str, Enum): + """Enum for Google Gemini Chat models""" + + GEMINI_15_FLASH = "gemini-1.5-flash" + GEMINI_15_PRO = "gemini-1.5-pro" + GEMINI_15_ULTRA = "gemini-1.5-ultra" + + +class GoogleAIConfig(BaseLLMConfig): + """GoogleAI Config""" + + model_config = SettingsConfigDict( + env_prefix=EnvPrefix.GOOGLEAI_, + extra="ignore", + ) + + llm_type: str = LLMType.GOOGLEAI + api_key: str = "" + api_base: str = "https://generativelanguage.googleapis.com/v1beta/openai/" + chat_model: str = Field(default=GoogleChatModel.GEMINI_15_FLASH) + max_tokens: int = 4096 + temperature: float = 0.7 + top_p: float = 1.0 + timeout: int = 30 + + @field_validator("api_key") + @classmethod + def validate_non_empty(cls, v, info: FieldValidationInfo): + if not v: + raise ValueError( + f"{info.field_name} must not be empty. Please set {EnvPrefix.GOOGLEAI_.value}{info.field_name.upper()} environment variable." + ) + return v + + +class GoogleAI(BaseLLM): + def __init__(self, config: GoogleAIConfig = None): + """ + :param config: GoogleAI Config + """ + if config is None: + config = GoogleAIConfig() + super().__init__(config=config) + try: + import openai + except ImportError: + raise ImportError("Please install OpenAI python library.") + + self.client = openai.OpenAI( + api_key=self.config.api_key, base_url=self.config.api_base + ) + + def _format_messages(self, messages: list): + """Format the messages to the format that Google Gemini expects.""" + formatted_messages = [] + + if messages and messages[0]["role"] == RoleTypes.system.value: + messages = messages[1:] + + for message in messages: + message["content"] = message.get("content", "") or "" + + if message["role"] == RoleTypes.assistant.value and message.get( + "tool_calls" + ): + formatted_messages.append( + { + "role": message["role"], + "content": message["content"], + "tool_calls": [ + { + "id": tool_call["id"], + "function": { + "name": tool_call.get("tool", {}).get("name", ""), + "arguments": json.dumps( + tool_call.get("tool", {}).get("arguments", {}) + ), + }, + "type": tool_call["type"], + } + for tool_call in message["tool_calls"] + ], + } + ) + elif message["role"] == RoleTypes.tool.value: + formatted_messages.append( + { + "role": RoleTypes.tool.value, + "content": [ + { + "type": "tool_result", + "tool_use_id": message["tool_call_id"], + "content": message["content"], + } + ], + } + ) + else: + formatted_messages.append(message) + + return formatted_messages + + def _format_tools(self, tools: list): + """Format the tools to the format that Gemini expects.""" + return [ + { + "type": "function", + "function": { + "name": tool.get("name", ""), + "description": tool.get("description", ""), + "parameters": tool.get("parameters", {}), + }, + } + for tool in tools + if tool.get("name") + ] + + def chat_completions( + self, messages: list, tools: list = [], stop=None, response_format=None + ): + """Get completions for chat using Gemini 1.5 Flash.""" + + params = { + "model": self.config.chat_model, + "messages": self._format_messages(messages), + "temperature": self.config.temperature or 0.7, + "max_tokens": self.config.max_tokens or 4096, + "top_p": self.config.top_p or 1.0, + "stop": stop if stop else None, + "timeout": self.config.timeout or 30, + } + + if tools: + params["tools"] = self._format_tools(tools) + params["tool_choice"] = "auto" + + if response_format: + params["response_format"] = response_format + + params = {k: v for k, v in params.items() if v is not None} + + try: + response = self.client.chat.completions.create(**params) + except Exception as e: + print(f"Error: {e}") + return LLMResponse(content=f"Error: {e}") + + content = ( + response.choices[0].message.content + if response.choices and response.choices[0].message.content + else "No response" + ) + + tool_calls = ( + [ + { + "id": tool_call.id, + "tool": { + "name": tool_call.function.name, + "arguments": json.loads(tool_call.function.arguments), + }, + "type": tool_call.type, + } + for tool_call in response.choices[0].message.tool_calls + ] + if response.choices and response.choices[0].message.tool_calls + else [] + ) + + send_tokens = getattr(response.usage, "prompt_tokens", 0) + recv_tokens = getattr(response.usage, "completion_tokens", 0) + total_tokens = getattr(response.usage, "total_tokens", 0) + + return LLMResponse( + content=content, + tool_calls=tool_calls, + finish_reason=response.choices[0].finish_reason if response.choices else "", + send_tokens=send_tokens, + recv_tokens=recv_tokens, + total_tokens=total_tokens, + status=LLMResponseStatus.SUCCESS, + ) From 98d58105a024964ee2780e5cb8e8049d6e5142ec Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Wed, 5 Mar 2025 13:53:50 +0530 Subject: [PATCH 02/12] fix: models, unwanted params Signed-off-by: royalpinto007 --- backend/director/llm/googleai.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py index 278544c3..d4498fd1 100644 --- a/backend/director/llm/googleai.py +++ b/backend/director/llm/googleai.py @@ -16,8 +16,12 @@ class GoogleChatModel(str, Enum): """Enum for Google Gemini Chat models""" GEMINI_15_FLASH = "gemini-1.5-flash" + GEMINI_15_FLASH_002 = "gemini-1.5-flash-002" GEMINI_15_PRO = "gemini-1.5-pro" - GEMINI_15_ULTRA = "gemini-1.5-ultra" + GEMINI_15_PRO_002 = "gemini-1.5-pro-002" + GEMINI_20_FLASH = "gemini-2.0-flash" + GEMINI_20_FLASH_001 = "gemini-2.0-flash-001" + GEMINI_20_PRO = "gemini-2.0-pro-exp" class GoogleAIConfig(BaseLLMConfig): @@ -31,7 +35,7 @@ class GoogleAIConfig(BaseLLMConfig): llm_type: str = LLMType.GOOGLEAI api_key: str = "" api_base: str = "https://generativelanguage.googleapis.com/v1beta/openai/" - chat_model: str = Field(default=GoogleChatModel.GEMINI_15_FLASH) + chat_model: str = Field(default=GoogleChatModel.GEMINI_20_FLASH) max_tokens: int = 4096 temperature: float = 0.7 top_p: float = 1.0 @@ -140,8 +144,6 @@ def chat_completions( "temperature": self.config.temperature or 0.7, "max_tokens": self.config.max_tokens or 4096, "top_p": self.config.top_p or 1.0, - "stop": stop if stop else None, - "timeout": self.config.timeout or 30, } if tools: From b22dd3010c726c7e291747febb7ae71734357b9d Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Wed, 5 Mar 2025 19:33:48 +0530 Subject: [PATCH 03/12] fix: parameters to gemini compatible format Signed-off-by: royalpinto007 --- backend/director/agents/base.py | 22 +++++++++++++++++++++- backend/director/handler.py | 4 ++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/backend/director/agents/base.py b/backend/director/agents/base.py index 3a37a42d..a5f467cb 100644 --- a/backend/director/agents/base.py +++ b/backend/director/agents/base.py @@ -39,7 +39,27 @@ def get_parameters(self): raise Exception( "Failed to infere parameters, please define JSON instead of using this automated util." ) - return parameters + + parameters["properties"].pop("args", None) + parameters["properties"].pop("kwargs", None) + + if "required" in parameters: + parameters["required"] = [ + param for param in parameters["required"] if param not in ["args", "kwargs"] + ] + + required_params = set(parameters.get("required", [])) + parameters["properties"] = { + key: value + for key, value in parameters["properties"].items() + if key in required_params + } + + return { + "type": "object", + "properties": parameters["properties"], + "required": parameters.get("required", []), + } def to_llm_format(self): """Convert the agent to LLM tool format.""" diff --git a/backend/director/handler.py b/backend/director/handler.py index 2dbb64b9..11c381a3 100644 --- a/backend/director/handler.py +++ b/backend/director/handler.py @@ -49,7 +49,7 @@ def __init__(self, db, **kwargs): SummarizeVideoAgent, DownloadAgent, PricingAgent, - UploadAgent, + # UploadAgent, SearchAgent, PromptClipAgent, IndexAgent, @@ -62,7 +62,7 @@ def __init__(self, db, **kwargs): SubtitleAgent, SlackAgent, EditingAgent, - DubbingAgent, + # DubbingAgent, TranscriptionAgent, TextToMovieAgent, MemeMakerAgent, From 5b550be2e15007c71e7ec0049919f7adabea92de Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Thu, 6 Mar 2025 10:59:43 +0530 Subject: [PATCH 04/12] fix: dubbing, upload compatible to gemini Signed-off-by: royalpinto007 --- backend/director/agents/dubbing.py | 8 ++--- backend/director/agents/upload.py | 58 +++++++++++++++--------------- backend/director/handler.py | 4 +-- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/backend/director/agents/dubbing.py b/backend/director/agents/dubbing.py index 4c2f75ec..1d4310af 100644 --- a/backend/director/agents/dubbing.py +++ b/backend/director/agents/dubbing.py @@ -35,10 +35,10 @@ "description": "The dubbing engine to use. Default is 'elevenlabs'. Possible values include 'elevenlabs'.", "default": "elevenlabs", }, - "engine_params": { - "type": "object", - "description": "Optional parameters for the dubbing engine.", - }, + # "engine_params": { + # "type": "object", + # "description": "Optional parameters for the dubbing engine.", + # }, }, "required": [ "video_id", diff --git a/backend/director/agents/upload.py b/backend/director/agents/upload.py index 48a41b83..ff3dbcf9 100644 --- a/backend/director/agents/upload.py +++ b/backend/director/agents/upload.py @@ -15,34 +15,34 @@ logger = logging.getLogger(__name__) -UPLOAD_AGENT_PARAMETERS = { - "type": "object", - "properties": { - "source": { - "type": "string", - "description": "URL or local path to upload the content", - }, - "source_type": { - "type": "string", - "description": "Type of given source.", - "enum": ["url", "local_file"], - }, - "name": { - "type": "string", - "description": "Name of the content to upload, optional parameter", - }, - "media_type": { - "type": "string", - "enum": ["video", "audio", "image"], - "description": "Type of media to upload, default is video", - }, - "collection_id": { - "type": "string", - "description": "Collection ID to upload the content", - }, - }, - "required": ["url", "media_type", "collection_id"], -} +# UPLOAD_AGENT_PARAMETERS = { +# "type": "object", +# "properties": { +# "source": { +# "type": "string", +# "description": "URL or local path to upload the content", +# }, +# "source_type": { +# "type": "string", +# "description": "Type of given source.", +# "enum": ["url", "local_file"], +# }, +# "name": { +# "type": "string", +# "description": "Name of the content to upload, optional parameter", +# }, +# "media_type": { +# "type": "string", +# "enum": ["video", "audio", "image"], +# "description": "Type of media to upload, default is video", +# }, +# "collection_id": { +# "type": "string", +# "description": "Collection ID to upload the content", +# }, +# }, +# "required": ["url", "media_type", "collection_id"], +# } class UploadAgent(BaseAgent): @@ -54,7 +54,7 @@ def __init__(self, session: Session, **kwargs): "The media content can be a video, audio, or image file. " "Youtube playlist and links are also supported. " ) - self.parameters = UPLOAD_AGENT_PARAMETERS + self.parameters = self.get_parameters() super().__init__(session=session, **kwargs) def _upload(self, source: str, source_type: str, media_type: str, name: str = None): diff --git a/backend/director/handler.py b/backend/director/handler.py index 11c381a3..2dbb64b9 100644 --- a/backend/director/handler.py +++ b/backend/director/handler.py @@ -49,7 +49,7 @@ def __init__(self, db, **kwargs): SummarizeVideoAgent, DownloadAgent, PricingAgent, - # UploadAgent, + UploadAgent, SearchAgent, PromptClipAgent, IndexAgent, @@ -62,7 +62,7 @@ def __init__(self, db, **kwargs): SubtitleAgent, SlackAgent, EditingAgent, - # DubbingAgent, + DubbingAgent, TranscriptionAgent, TextToMovieAgent, MemeMakerAgent, From eacddf499e3b3bb02b6ce554dbd9a03646360268 Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Thu, 6 Mar 2025 14:25:59 +0530 Subject: [PATCH 05/12] fix: unwanted dubbing parameter, url upload validation Signed-off-by: royalpinto007 --- backend/director/agents/dubbing.py | 6 --- backend/director/agents/upload.py | 80 +++++++++++++++++++----------- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/backend/director/agents/dubbing.py b/backend/director/agents/dubbing.py index 1d4310af..2b03c6cf 100644 --- a/backend/director/agents/dubbing.py +++ b/backend/director/agents/dubbing.py @@ -35,10 +35,6 @@ "description": "The dubbing engine to use. Default is 'elevenlabs'. Possible values include 'elevenlabs'.", "default": "elevenlabs", }, - # "engine_params": { - # "type": "object", - # "description": "Optional parameters for the dubbing engine.", - # }, }, "required": [ "video_id", @@ -66,7 +62,6 @@ def run( target_language_code: str, collection_id: str, engine: str, - engine_params: dict = {}, *args, **kwargs, ) -> AgentResponse: @@ -77,7 +72,6 @@ def run( :param str target_language_code: The target language code for dubbing (e.g. es). :param str collection_id: The ID of the collection to process. :param str engine: The dubbing engine to use. Default is 'elevenlabs'. - :param dict engine_params: Optional parameters for the dubbing engine. :param args: Additional positional arguments. :param kwargs: Additional keyword arguments. :return: The response containing information about the dubbing operation. diff --git a/backend/director/agents/upload.py b/backend/director/agents/upload.py index ff3dbcf9..3ea40769 100644 --- a/backend/director/agents/upload.py +++ b/backend/director/agents/upload.py @@ -15,34 +15,34 @@ logger = logging.getLogger(__name__) -# UPLOAD_AGENT_PARAMETERS = { -# "type": "object", -# "properties": { -# "source": { -# "type": "string", -# "description": "URL or local path to upload the content", -# }, -# "source_type": { -# "type": "string", -# "description": "Type of given source.", -# "enum": ["url", "local_file"], -# }, -# "name": { -# "type": "string", -# "description": "Name of the content to upload, optional parameter", -# }, -# "media_type": { -# "type": "string", -# "enum": ["video", "audio", "image"], -# "description": "Type of media to upload, default is video", -# }, -# "collection_id": { -# "type": "string", -# "description": "Collection ID to upload the content", -# }, -# }, -# "required": ["url", "media_type", "collection_id"], -# } +UPLOAD_AGENT_PARAMETERS = { + "type": "object", + "properties": { + "source": { + "type": "string", + "description": "URL or local path to upload the content", + }, + "source_type": { + "type": "string", + "description": "Type of given source.", + "enum": ["url", "local_file"], + }, + "name": { + "type": "string", + "description": "Name of the content to upload, optional parameter", + }, + "media_type": { + "type": "string", + "enum": ["video", "audio", "image"], + "description": "Type of media to upload, default is video", + }, + "collection_id": { + "type": "string", + "description": "Collection ID to upload the content", + }, + }, + "required": ["media_type", "collection_id"], +} class UploadAgent(BaseAgent): @@ -54,11 +54,19 @@ def __init__(self, session: Session, **kwargs): "The media content can be a video, audio, or image file. " "Youtube playlist and links are also supported. " ) - self.parameters = self.get_parameters() + self.parameters = UPLOAD_AGENT_PARAMETERS super().__init__(session=session, **kwargs) def _upload(self, source: str, source_type: str, media_type: str, name: str = None): """Upload the media with the given URL.""" + + if not source or not isinstance(source, str) or not source.strip(): + return AgentResponse( + status=AgentStatus.ERROR, + message="Invalid source: A valid URL or local path is required for upload.", + data={}, + ) + try: if media_type == "video": content = VideoContent( @@ -161,6 +169,20 @@ def run( :return: AgentResponse - The response containing information about the upload operation. """ + if not source or not isinstance(source, str) or not source.strip(): + return AgentResponse( + status=AgentStatus.ERROR, + message="Invalid source: A valid URL or local path is required.", + data={}, + ) + + if source_type not in ["url", "local_file"]: + return AgentResponse( + status=AgentStatus.ERROR, + message=f"Invalid source type '{source_type}'. Must be 'url' or 'local_file'.", + data={}, + ) + self.videodb_tool = VideoDBTool(collection_id=collection_id) if source_type == "local_file": From ace2d7b35cf674c930e85f3476a5d96e0cf0541c Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Thu, 6 Mar 2025 15:03:08 +0530 Subject: [PATCH 06/12] fix: url with source in required Signed-off-by: royalpinto007 --- backend/director/agents/upload.py | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/backend/director/agents/upload.py b/backend/director/agents/upload.py index 3ea40769..6990c6d3 100644 --- a/backend/director/agents/upload.py +++ b/backend/director/agents/upload.py @@ -41,7 +41,7 @@ "description": "Collection ID to upload the content", }, }, - "required": ["media_type", "collection_id"], + "required": ["source", "media_type", "collection_id"], } @@ -59,14 +59,6 @@ def __init__(self, session: Session, **kwargs): def _upload(self, source: str, source_type: str, media_type: str, name: str = None): """Upload the media with the given URL.""" - - if not source or not isinstance(source, str) or not source.strip(): - return AgentResponse( - status=AgentStatus.ERROR, - message="Invalid source: A valid URL or local path is required for upload.", - data={}, - ) - try: if media_type == "video": content = VideoContent( @@ -169,20 +161,6 @@ def run( :return: AgentResponse - The response containing information about the upload operation. """ - if not source or not isinstance(source, str) or not source.strip(): - return AgentResponse( - status=AgentStatus.ERROR, - message="Invalid source: A valid URL or local path is required.", - data={}, - ) - - if source_type not in ["url", "local_file"]: - return AgentResponse( - status=AgentStatus.ERROR, - message=f"Invalid source type '{source_type}'. Must be 'url' or 'local_file'.", - data={}, - ) - self.videodb_tool = VideoDBTool(collection_id=collection_id) if source_type == "local_file": From 372abb91e2750d5525d34924afca9503ee7b642c Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Fri, 7 Mar 2025 16:08:50 +0530 Subject: [PATCH 07/12] fix: null type and unneccessary changes Signed-off-by: royalpinto007 --- backend/director/agents/base.py | 17 ++++------------- backend/director/agents/download.py | 9 ++++++++- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/backend/director/agents/base.py b/backend/director/agents/base.py index a5f467cb..5fc9e3c7 100644 --- a/backend/director/agents/base.py +++ b/backend/director/agents/base.py @@ -45,21 +45,12 @@ def get_parameters(self): if "required" in parameters: parameters["required"] = [ - param for param in parameters["required"] if param not in ["args", "kwargs"] + param + for param in parameters["required"] + if param not in ["args", "kwargs"] ] - required_params = set(parameters.get("required", [])) - parameters["properties"] = { - key: value - for key, value in parameters["properties"].items() - if key in required_params - } - - return { - "type": "object", - "properties": parameters["properties"], - "required": parameters.get("required", []), - } + return parameters def to_llm_format(self): """Convert the agent to LLM tool format.""" diff --git a/backend/director/agents/download.py b/backend/director/agents/download.py index 24c531db..01f475d5 100644 --- a/backend/director/agents/download.py +++ b/backend/director/agents/download.py @@ -14,7 +14,14 @@ def __init__(self, session: Session, **kwargs): self.parameters = self.get_parameters() super().__init__(session=session, **kwargs) - def run(self, stream_link: str, name: str = None, *args, **kwargs) -> AgentResponse: + def run( + self, + stream_link: str, + name: str = None, + stream_name: str = None, + *args, + **kwargs, + ) -> AgentResponse: """ Downloads the video from the given stream link. From 0875c0c8d5d54e0f602ebbc559d29e9b4fb87c5c Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Fri, 7 Mar 2025 17:05:34 +0530 Subject: [PATCH 08/12] fix: unnecessary variables, env sample Signed-off-by: royalpinto007 --- backend/.env.sample | 1 + backend/director/llm/googleai.py | 62 +++++++++++++------------------- 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/backend/.env.sample b/backend/.env.sample index e4251386..e48f9645 100644 --- a/backend/.env.sample +++ b/backend/.env.sample @@ -18,6 +18,7 @@ SQLITE_DB_PATH= # LLM Integrations OPENAI_API_KEY= ANTHROPIC_API_KEY= +GOOGLEAI_API_KEY= # Tools REPLICATE_API_TOKEN= diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py index d4498fd1..56231c49 100644 --- a/backend/director/llm/googleai.py +++ b/backend/director/llm/googleai.py @@ -15,13 +15,13 @@ class GoogleChatModel(str, Enum): """Enum for Google Gemini Chat models""" - GEMINI_15_FLASH = "gemini-1.5-flash" - GEMINI_15_FLASH_002 = "gemini-1.5-flash-002" - GEMINI_15_PRO = "gemini-1.5-pro" - GEMINI_15_PRO_002 = "gemini-1.5-pro-002" - GEMINI_20_FLASH = "gemini-2.0-flash" - GEMINI_20_FLASH_001 = "gemini-2.0-flash-001" - GEMINI_20_PRO = "gemini-2.0-pro-exp" + GEMINI_1_5_FLASH = "gemini-1.5-flash" + GEMINI_1_5_FLASH_0_0_2 = "gemini-1.5-flash-002" + GEMINI_1_5_PRO = "gemini-1.5-pro" + GEMINI_1_5_PRO_0_0_2 = "gemini-1.5-pro-002" + GEMINI_2_0_FLASH = "gemini-2.0-flash" + GEMINI_2_0_FLASH_0_0_1 = "gemini-2.0-flash-001" + GEMINI_2_0_PRO = "gemini-2.0-pro-exp" class GoogleAIConfig(BaseLLMConfig): @@ -35,11 +35,8 @@ class GoogleAIConfig(BaseLLMConfig): llm_type: str = LLMType.GOOGLEAI api_key: str = "" api_base: str = "https://generativelanguage.googleapis.com/v1beta/openai/" - chat_model: str = Field(default=GoogleChatModel.GEMINI_20_FLASH) + chat_model: str = Field(default=GoogleChatModel.GEMINI_2_0_FLASH) max_tokens: int = 4096 - temperature: float = 0.7 - top_p: float = 1.0 - timeout: int = 30 @field_validator("api_key") @classmethod @@ -65,7 +62,7 @@ def __init__(self, config: GoogleAIConfig = None): raise ImportError("Please install OpenAI python library.") self.client = openai.OpenAI( - api_key=self.config.api_key, base_url=self.config.api_base + api_key=self.api_key, base_url=self.api_base ) def _format_messages(self, messages: list): @@ -134,16 +131,15 @@ def _format_tools(self, tools: list): ] def chat_completions( - self, messages: list, tools: list = [], stop=None, response_format=None + self, messages: list, tools: list = [], response_format=None ): - """Get completions for chat using Gemini 1.5 Flash.""" - + """Get chat completions using Gemini.""" params = { - "model": self.config.chat_model, + "model": self.chat_model, "messages": self._format_messages(messages), - "temperature": self.config.temperature or 0.7, - "max_tokens": self.config.max_tokens or 4096, - "top_p": self.config.top_p or 1.0, + "temperature": self.temperature, + "max_tokens": self.max_tokens, + "top_p": self.top_p, } if tools: @@ -153,46 +149,38 @@ def chat_completions( if response_format: params["response_format"] = response_format - params = {k: v for k, v in params.items() if v is not None} - try: response = self.client.chat.completions.create(**params) except Exception as e: print(f"Error: {e}") return LLMResponse(content=f"Error: {e}") + choice = response.choices[0] if response.choices else None content = ( - response.choices[0].message.content - if response.choices and response.choices[0].message.content + choice.message.content + if choice and choice.message.content else "No response" ) tool_calls = ( [ { - "id": tool_call.id, + "id": tc.id, "tool": { - "name": tool_call.function.name, - "arguments": json.loads(tool_call.function.arguments), + "name": tc.function.name, + "arguments": json.loads(tc.function.arguments), }, - "type": tool_call.type, + "type": tc.type, } - for tool_call in response.choices[0].message.tool_calls + for tc in choice.message.tool_calls ] - if response.choices and response.choices[0].message.tool_calls + if choice and choice.message.tool_calls else [] ) - send_tokens = getattr(response.usage, "prompt_tokens", 0) - recv_tokens = getattr(response.usage, "completion_tokens", 0) - total_tokens = getattr(response.usage, "total_tokens", 0) - return LLMResponse( content=content, tool_calls=tool_calls, - finish_reason=response.choices[0].finish_reason if response.choices else "", - send_tokens=send_tokens, - recv_tokens=recv_tokens, - total_tokens=total_tokens, + finish_reason=choice.finish_reason if choice else "", status=LLMResponseStatus.SUCCESS, ) From ddf38ae14034b4be12ee047e4e5306e20c4e3bc4 Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Fri, 7 Mar 2025 18:55:58 +0530 Subject: [PATCH 09/12] fix: openai structure, docs Signed-off-by: royalpinto007 --- backend/director/llm/googleai.py | 101 +++++++++++++++---------------- docs/llm/googleai.md | 15 +++++ 2 files changed, 63 insertions(+), 53 deletions(-) create mode 100644 docs/llm/googleai.md diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py index 56231c49..5a8d54d6 100644 --- a/backend/director/llm/googleai.py +++ b/backend/director/llm/googleai.py @@ -4,7 +4,7 @@ from pydantic import Field, field_validator, FieldValidationInfo from pydantic_settings import SettingsConfigDict -from director.core.session import RoleTypes + from director.llm.base import BaseLLM, BaseLLMConfig, LLMResponse, LLMResponseStatus from director.constants import ( LLMType, @@ -69,15 +69,8 @@ def _format_messages(self, messages: list): """Format the messages to the format that Google Gemini expects.""" formatted_messages = [] - if messages and messages[0]["role"] == RoleTypes.system.value: - messages = messages[1:] - for message in messages: - message["content"] = message.get("content", "") or "" - - if message["role"] == RoleTypes.assistant.value and message.get( - "tool_calls" - ): + if message["role"] == "assistant" and message.get("tool_calls"): formatted_messages.append( { "role": message["role"], @@ -86,10 +79,8 @@ def _format_messages(self, messages: list): { "id": tool_call["id"], "function": { - "name": tool_call.get("tool", {}).get("name", ""), - "arguments": json.dumps( - tool_call.get("tool", {}).get("arguments", {}) - ), + "name": tool_call["tool"]["name"], + "arguments": json.dumps(tool_call["tool"]["arguments"]), }, "type": tool_call["type"], } @@ -97,26 +88,40 @@ def _format_messages(self, messages: list): ], } ) - elif message["role"] == RoleTypes.tool.value: - formatted_messages.append( - { - "role": RoleTypes.tool.value, - "content": [ - { - "type": "tool_result", - "tool_use_id": message["tool_call_id"], - "content": message["content"], - } - ], - } - ) else: formatted_messages.append(message) return formatted_messages def _format_tools(self, tools: list): - """Format the tools to the format that Gemini expects.""" + """Format the tools to the format that Gemini expects. + + **Example**:: + + [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. Chicago, IL" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ] + """ return [ { "type": "function", @@ -130,10 +135,11 @@ def _format_tools(self, tools: list): if tool.get("name") ] - def chat_completions( - self, messages: list, tools: list = [], response_format=None - ): - """Get chat completions using Gemini.""" + def chat_completions(self, messages: list, tools: list = [], response_format=None): + """Get chat completions using Gemini. + + docs: https://ai.google.dev/gemini-api/docs/openai + """ params = { "model": self.chat_model, "messages": self._format_messages(messages), @@ -155,32 +161,21 @@ def chat_completions( print(f"Error: {e}") return LLMResponse(content=f"Error: {e}") - choice = response.choices[0] if response.choices else None - content = ( - choice.message.content - if choice and choice.message.content - else "No response" - ) - - tool_calls = ( - [ + return LLMResponse( + content=response.choices[0].message.content or "", + tool_calls=[ { - "id": tc.id, + "id": tool_call.id, "tool": { - "name": tc.function.name, - "arguments": json.loads(tc.function.arguments), + "name": tool_call.function.name, + "arguments": json.loads(tool_call.function.arguments), }, - "type": tc.type, + "type": tool_call.type, } - for tc in choice.message.tool_calls + for tool_call in response.choices[0].message.tool_calls ] - if choice and choice.message.tool_calls - else [] - ) - - return LLMResponse( - content=content, - tool_calls=tool_calls, - finish_reason=choice.finish_reason if choice else "", + if response.choices[0].message.tool_calls + else [], + finish_reason=response.choices[0].finish_reason, status=LLMResponseStatus.SUCCESS, ) diff --git a/docs/llm/googleai.md b/docs/llm/googleai.md new file mode 100644 index 00000000..9cbe75eb --- /dev/null +++ b/docs/llm/googleai.md @@ -0,0 +1,15 @@ +## GoogleAI + +GoogleAI extends the Base LLM and implements the Google Gemini API. + +### GoogleAI Config + +GoogleAI Config is the configuration object for Google Gemini. It is used to configure Google Gemini and is passed to GoogleAI when it is created. + +::: director.llm.googleai.GoogleAIConfig + +### GoogleAI Interface + +GoogleAI is the LLM used by the agents and tools. It is used to generate responses to messages. + +::: director.llm.googleai.GoogleAI From d07aefa9c4558537160d7f0178ed882b14f12e48 Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Fri, 7 Mar 2025 19:38:54 +0530 Subject: [PATCH 10/12] fix: basellm class, docs Signed-off-by: royalpinto007 --- backend/director/llm/googleai.py | 8 +++++++- mkdocs.yml | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py index 5a8d54d6..41c658bf 100644 --- a/backend/director/llm/googleai.py +++ b/backend/director/llm/googleai.py @@ -135,7 +135,9 @@ def _format_tools(self, tools: list): if tool.get("name") ] - def chat_completions(self, messages: list, tools: list = [], response_format=None): + def chat_completions( + self, messages: list, tools: list = [], stop=None, response_format=None + ): """Get chat completions using Gemini. docs: https://ai.google.dev/gemini-api/docs/openai @@ -146,6 +148,7 @@ def chat_completions(self, messages: list, tools: list = [], response_format=Non "temperature": self.temperature, "max_tokens": self.max_tokens, "top_p": self.top_p, + "timeout": self.timeout, } if tools: @@ -177,5 +180,8 @@ def chat_completions(self, messages: list, tools: list = [], response_format=Non if response.choices[0].message.tool_calls else [], finish_reason=response.choices[0].finish_reason, + send_tokens=response.usage.prompt_tokens, + recv_tokens=response.usage.completion_tokens, + total_tokens=response.usage.total_tokens, status=LLMResponseStatus.SUCCESS, ) diff --git a/mkdocs.yml b/mkdocs.yml index 196ad5e9..d7ef08b1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -74,6 +74,7 @@ nav: - Integrations: - 'OpenAI': 'llm/openai.md' - 'AnthropicAI': 'llm/anthropic.md' + - 'GoogleAI': 'llm/googleai.md' - 'Database': - 'Interface': 'database/interface.md' - Integrations: From bb3c63cc198253f4c5dc49fbd70b450cfce23b57 Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Mon, 10 Mar 2025 14:14:51 +0530 Subject: [PATCH 11/12] fix: reasoning error Signed-off-by: royalpinto007 --- backend/director/llm/googleai.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py index 41c658bf..0d519721 100644 --- a/backend/director/llm/googleai.py +++ b/backend/director/llm/googleai.py @@ -74,13 +74,17 @@ def _format_messages(self, messages: list): formatted_messages.append( { "role": message["role"], - "content": message["content"], + "content": message["content"] + if message["content"] + else "[Processing request...]", "tool_calls": [ { "id": tool_call["id"], "function": { "name": tool_call["tool"]["name"], - "arguments": json.dumps(tool_call["tool"]["arguments"]), + "arguments": json.dumps( + tool_call["tool"]["arguments"] + ), }, "type": tool_call["type"], } From b3df15837b64fbc8ffacdeef80816191185732ca Mon Sep 17 00:00:00 2001 From: royalpinto007 Date: Mon, 10 Mar 2025 21:10:33 +0530 Subject: [PATCH 12/12] fix: assistant content Signed-off-by: royalpinto007 --- backend/director/core/session.py | 2 ++ backend/director/llm/googleai.py | 9 ++------- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/backend/director/core/session.py b/backend/director/core/session.py index 49f1d875..cbcdaf24 100644 --- a/backend/director/core/session.py +++ b/backend/director/core/session.py @@ -293,6 +293,8 @@ def to_llm_msg(self): if self.role == RoleTypes.assistant: if self.tool_calls: msg["tool_calls"] = self.tool_calls + if not self.content: + msg["content"] = [] return msg if self.role == RoleTypes.tool: diff --git a/backend/director/llm/googleai.py b/backend/director/llm/googleai.py index 0d519721..78e69a64 100644 --- a/backend/director/llm/googleai.py +++ b/backend/director/llm/googleai.py @@ -19,9 +19,6 @@ class GoogleChatModel(str, Enum): GEMINI_1_5_FLASH_0_0_2 = "gemini-1.5-flash-002" GEMINI_1_5_PRO = "gemini-1.5-pro" GEMINI_1_5_PRO_0_0_2 = "gemini-1.5-pro-002" - GEMINI_2_0_FLASH = "gemini-2.0-flash" - GEMINI_2_0_FLASH_0_0_1 = "gemini-2.0-flash-001" - GEMINI_2_0_PRO = "gemini-2.0-pro-exp" class GoogleAIConfig(BaseLLMConfig): @@ -35,7 +32,7 @@ class GoogleAIConfig(BaseLLMConfig): llm_type: str = LLMType.GOOGLEAI api_key: str = "" api_base: str = "https://generativelanguage.googleapis.com/v1beta/openai/" - chat_model: str = Field(default=GoogleChatModel.GEMINI_2_0_FLASH) + chat_model: str = Field(default=GoogleChatModel.GEMINI_1_5_FLASH) max_tokens: int = 4096 @field_validator("api_key") @@ -74,9 +71,7 @@ def _format_messages(self, messages: list): formatted_messages.append( { "role": message["role"], - "content": message["content"] - if message["content"] - else "[Processing request...]", + "content": message["content"], "tool_calls": [ { "id": tool_call["id"],