diff --git a/gptme/cli.py b/gptme/cli.py index d8768ad3..2b542ef5 100644 --- a/gptme/cli.py +++ b/gptme/cli.py @@ -177,7 +177,8 @@ def main( tool_allowlist = [tool for tools in tool_allowlist for tool in tools.split(",")] config = get_config() - tool_format = tool_format or config.get_env("TOOL_TYPE") or "markdown" + + tool_format = tool_format or config.get_env("TOOL_FORMAT") or "markdown" set_tool_format(tool_format) diff --git a/gptme/llm/llm_openai.py b/gptme/llm/llm_openai.py index e12d959f..9452b6ca 100644 --- a/gptme/llm/llm_openai.py +++ b/gptme/llm/llm_openai.py @@ -219,6 +219,8 @@ def _process_file(msg: dict) -> dict: else [{"type": "text", "text": message_content}] ) + has_images = False + for f in msg.get("files", []): f = Path(f) ext = f.suffix[1:] @@ -263,8 +265,14 @@ def _process_file(msg: dict) -> dict: "image_url": {"url": f"data:{media_type};base64,{data}"}, } ) + has_images = True msg["content"] = content + + if msg["role"] == "system" and has_images: + # Images must come from user with openai + msg["role"] = "user" + return msg diff --git a/gptme/tools/base.py b/gptme/tools/base.py index 89d822bf..26614126 100644 --- a/gptme/tools/base.py +++ b/gptme/tools/base.py @@ -2,10 +2,11 @@ import json import logging import re +import types from collections.abc import Callable, Generator from dataclasses import dataclass, field from textwrap import indent -from typing import Any, Literal, Protocol, TypeAlias, cast +from typing import Any, Literal, Protocol, TypeAlias, cast, get_origin from lxml import etree @@ -71,6 +72,30 @@ class Parameter: required: bool = False +# TODO: there must be a better way? +def derive_type(t) -> str: + if get_origin(t) == Literal: + v = ", ".join(f'"{a}"' for a in t.__args__) + return f"Literal[{v}]" + elif get_origin(t) == types.UnionType: + v = ", ".join(derive_type(a) for a in t.__args__) + return f"Union[{v}]" + else: + return t.__name__ + + +def callable_signature(func: Callable) -> str: + # returns a signature f(arg1: type1, arg2: type2, ...) -> return_type + args = ", ".join( + f"{k}: {derive_type(v)}" + for k, v in func.__annotations__.items() + if k != "return" + ) + ret_type = func.__annotations__.get("return") + ret = f" -> {derive_type(ret_type)}" if ret_type else "" + return f"{func.__name__}({args}){ret}" + + @dataclass(frozen=True, eq=False) class ToolSpec: """ @@ -139,6 +164,9 @@ def get_instructions(self, tool_format: ToolFormat): if tool_format in self.instructions_format: instructions.append(self.instructions_format[tool_format]) + if self.functions: + instructions.append(self.get_functions_description()) + return "\n\n".join(instructions) def get_tool_prompt(self, examples: bool, tool_format: ToolFormat): @@ -157,6 +185,19 @@ def get_examples(self, tool_format: ToolFormat = "markdown"): return self.examples(tool_format) return self.examples + def get_functions_description(self) -> str: + # return a prompt with a brief description of the available functions + if self.functions: + description = ( + "The following Python functions can be called with `ipython` tool:\n\n" + ) + return description + "\n".join( + f"{callable_signature(func)}: {func.__doc__ or 'No description'}" + for func in self.functions + ) + else: + return "None" + @dataclass(frozen=True) class ToolUse: diff --git a/gptme/tools/browser.py b/gptme/tools/browser.py index ce8ed393..6a9eb19b 100644 --- a/gptme/tools/browser.py +++ b/gptme/tools/browser.py @@ -46,10 +46,6 @@ EngineType = Literal["google", "duckduckgo"] -instructions = """ -To browse the web, you can use the `read_url`, `search`, and `screenshot_url` functions in Python with the `ipython` tool. -""".strip() - def examples(tool_format): return f""" @@ -128,8 +124,7 @@ def screenshot_url(url: str, path: Path | str | None = None) -> Path: tool = ToolSpec( name="browser", - desc="Browse the web", - instructions=instructions, + desc="Browse, search or screenshot the web", examples=examples, functions=[read_url, search, screenshot_url], available=has_browser_tool(), diff --git a/gptme/tools/chats.py b/gptme/tools/chats.py index da3195b3..623a2dff 100644 --- a/gptme/tools/chats.py +++ b/gptme/tools/chats.py @@ -140,15 +140,6 @@ def read_chat(conversation: str, max_results: int = 5, incl_system=False) -> Non print(f"Conversation '{conversation}' not found.") -instructions = """ -you can list, search and summarize past conversation logs by using the following Python functions -with the `ipython` tool: -- list_chats -- search_chats -- read_chat -""".strip() - - def examples(tool_format): return f""" ### Search for a specific topic in past conversations @@ -161,7 +152,6 @@ def examples(tool_format): tool = ToolSpec( name="chats", desc="List, search, and summarize past conversation logs", - instructions=instructions, examples=examples, functions=[list_chats, search_chats, read_chat], ) diff --git a/gptme/tools/computer.py b/gptme/tools/computer.py index 3cbf1157..8a628e02 100644 --- a/gptme/tools/computer.py +++ b/gptme/tools/computer.py @@ -246,7 +246,7 @@ def computer( instructions = """ -Use the following Python function with the `ipython` tool to interact with the computer through X11. +You can interact with the computer through X11 with the `computer` Python function. Available actions: - key: Send key sequence (e.g., "Return", "Control_L+c") - type: Type text with realistic delays diff --git a/gptme/tools/python.py b/gptme/tools/python.py index c93ea2d3..46b21656 100644 --- a/gptme/tools/python.py +++ b/gptme/tools/python.py @@ -8,14 +8,11 @@ import functools import importlib.util import re -import types from collections.abc import Callable, Generator from logging import getLogger from typing import ( TYPE_CHECKING, - Literal, TypeVar, - get_origin, ) from ..message import Message @@ -49,38 +46,6 @@ def register_function(func: T) -> T: return func -# TODO: there must be a better way? -def derive_type(t) -> str: - if get_origin(t) == Literal: - v = ", ".join(f'"{a}"' for a in t.__args__) - return f"Literal[{v}]" - elif get_origin(t) == types.UnionType: - v = ", ".join(derive_type(a) for a in t.__args__) - return f"Union[{v}]" - else: - return t.__name__ - - -def callable_signature(func: Callable) -> str: - # returns a signature f(arg1: type1, arg2: type2, ...) -> return_type - args = ", ".join( - f"{k}: {derive_type(v)}" - for k, v in func.__annotations__.items() - if k != "return" - ) - ret_type = func.__annotations__.get("return") - ret = f" -> {derive_type(ret_type)}" if ret_type else "" - return f"{func.__name__}({args}){ret}" - - -def get_functions_prompt() -> str: - # return a prompt with a brief description of the available functions - return "\n".join( - f"- {callable_signature(func)}: {func.__doc__ or 'No description'}" - for func in registered_functions.values() - ) - - def _get_ipython(): global _ipython from IPython.terminal.embed import InteractiveShellEmbed # fmt: skip @@ -172,6 +137,10 @@ def get_installed_python_libraries() -> set[str]: return installed +def get_functions(): + return "\n".join([f"- {func.__name__}" for func in registered_functions.values()]) + + instructions = """ This tool execute Python code in an interactive IPython session. It will respond with the output and result of the execution. @@ -218,9 +187,9 @@ def init() -> ToolSpec: The following libraries are available: {python_libraries_str} -The following functions are available in the REPL: -{get_functions_prompt()} - """.strip() +The following functions are available: +{get_functions()} +""".strip() # create a copy with the updated instructions return dataclasses.replace(tool, instructions=_instructions) diff --git a/gptme/tools/rag.py b/gptme/tools/rag.py index ca112fec..f7b776f6 100644 --- a/gptme/tools/rag.py +++ b/gptme/tools/rag.py @@ -56,10 +56,7 @@ logger = logging.getLogger(__name__) instructions = """ -Use RAG to index and search project documentation. Use `ipython` tool with the following Python functions: -- rag_index -- rag_search -- rag_status +Use RAG to index and search project documentation. """ diff --git a/gptme/tools/screenshot.py b/gptme/tools/screenshot.py index 07462839..84c29824 100644 --- a/gptme/tools/screenshot.py +++ b/gptme/tools/screenshot.py @@ -42,6 +42,5 @@ def screenshot(path: Path | None = None) -> Path: tool = ToolSpec( name="screenshot", desc="Take a screenshot", - instructions="Use the `screenshot` Python function in `ipython` tool to capture a screenshot. You can optionally specify a filename.", functions=[screenshot], ) diff --git a/gptme/tools/subagent.py b/gptme/tools/subagent.py index bf4aaae9..e3a0b6a7 100644 --- a/gptme/tools/subagent.py +++ b/gptme/tools/subagent.py @@ -156,10 +156,7 @@ def examples(tool_format): instructions = """ -You can use the following Python function with `ipython` tool to manage subagents: -- subagent(sub_agent_name, prompt) -- subagent_status(sub_agent_name) -- subagent_wait(sub_agent_name) +You can create, check status and wait for subagents. """.strip() tool = ToolSpec( diff --git a/gptme/tools/vision.py b/gptme/tools/vision.py index 8bd74ca5..dc73fdaa 100644 --- a/gptme/tools/vision.py +++ b/gptme/tools/vision.py @@ -20,7 +20,7 @@ def view_image(image_path: Path | str) -> Message: instructions = """ -You can use the `view_image(path)` Python function with `ipython` tool to open an image file. +Use the `view_image` Python function with `ipython` tool to view an image file. """.strip() tool = ToolSpec( diff --git a/gptme/tools/youtube.py b/gptme/tools/youtube.py index 0b7077dc..3412bba9 100644 --- a/gptme/tools/youtube.py +++ b/gptme/tools/youtube.py @@ -28,17 +28,9 @@ def summarize_transcript(transcript: str) -> str: return llm_summarize(transcript).content -instructions = """ -You can use the following Python functions with `ipython` tool: -- `get_transcript(youtube_video_id)` -- `summarize_transcript(youtube_video_id)` -""".strip() - - tool: ToolSpec = ToolSpec( name="youtube", desc="Fetch and summarize YouTube video transcripts", - instructions=instructions, functions=[get_transcript, summarize_transcript], block_types=["youtube"], available=bool(YouTubeTranscriptApi), diff --git a/tests/test_tools_python.py b/tests/test_tools_python.py index 86fa98d5..ca6ade7b 100644 --- a/tests/test_tools_python.py +++ b/tests/test_tools_python.py @@ -1,6 +1,7 @@ from typing import Literal, TypeAlias -from gptme.tools.python import callable_signature, execute_python +from gptme.tools.python import execute_python +from gptme.tools.base import callable_signature def run(code):