From 391f092e3a201c2687b652497b62e9b042fec6c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Thu, 10 Oct 2024 10:54:36 +0200 Subject: [PATCH] docs: improved getting started, tool docs, and docstrings --- docs/cli.rst | 16 +++++++++++ docs/conf.py | 2 +- docs/getting-started.rst | 47 +++++++++----------------------- docs/server.rst | 10 +++++-- docs/tools.rst | 45 +++++++++++++++++++++++++++---- gptme/tools/base.py | 19 ++++++++++--- gptme/tools/browser.py | 58 +++++++++++++--------------------------- gptme/tools/read.py | 5 ++-- gptme/tools/save.py | 26 +++++++++--------- gptme/tools/tmux.py | 2 ++ gptme/tools/vision.py | 6 +++++ 11 files changed, 136 insertions(+), 100 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index c0a1857ad..163149687 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -10,14 +10,30 @@ gptme provides the following commands: This is the full CLI reference. For a more concise version, run ``gptme --help``. +.. rubric:: gptme + +You can skip confirmation prompts and run in non-interactive mode to terminate when all prompts have been completed: + +.. code-block:: bash + + gptme --non-interactive --no-confirm 'create a snake game using curses in snake.py, dont run it' '-' 'make the snake green and the apple red' + +This should make it first write snake.py, then make the change in a following prompt. + +The '-' is special "multiprompt" syntax that tells the assistant to wait for the assistant to finish work on the next prompt (run until no more tool calls) before continuing. + .. click:: gptme.cli:main :prog: gptme :nested: full +.. rubric:: gptme-server + .. click:: gptme.server:main :prog: gptme-server :nested: full +.. rubric:: gptme-eval + .. click:: gptme.eval:main :prog: gptme-eval :nested: full diff --git a/docs/conf.py b/docs/conf.py index 492ca49ff..4b318b8d4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,7 +4,6 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - import re from datetime import date @@ -116,6 +115,7 @@ def setup(app): ("py:class", "pathlib.Path"), ("py:class", "flask.app.Flask"), ("py:class", "gptme.tools.python.T"), + ("py:class", "threading.Thread"), ] # -- Options for HTML output ------------------------------------------------- diff --git a/docs/getting-started.rst b/docs/getting-started.rst index 649d05b23..c23e1dc87 100644 --- a/docs/getting-started.rst +++ b/docs/getting-started.rst @@ -29,8 +29,7 @@ To initiate a new chat or select an existing one, execute: This will show you a list of past chats, allowing you to select one or start a new one. -Writing a file -************** +.. rubric:: Writing a file You can then interact with the assistant. Lets start by asking it to write code. @@ -50,8 +49,7 @@ You can then interact with the assistant. Lets start by asking it to write code. The assistant will prompt for your confirmation and save the file, as requested. -Making changes -************** +.. rubric:: Making changes We can also start chats and request changes directly from the command line. The contents of any mentioned text files will be included as context, and the assistant will generate patches to apply the requested changes: @@ -74,42 +72,23 @@ We can also start chats and request changes directly from the command line. The System: Patch applied .. note:: - With the browser extras installed, the assistant can also process URLs included in the prompt. + With the :ref:`tools:browser` extras installed, the assistant can also process URLs included in the prompt. -Other tools -*********** +More tools +********** -You can read about other tools on the :doc:`tools` page. +You can read about all the other tools on the :doc:`tools` page. -Other interfaces -**************** +Including :ref:`tools:shell`, :ref:`tools:python`, how to set up :ref:`tools:browser`, and use :ref:`tools:vision`. -There are other ways to interact with the assistant: +Interfaces +********** -Command line -^^^^^^^^^^^^ +There are several ways to interact with gptme: -Commands can also be executed directly from the command line. For example, one can skip confirmation prompts and run in non-interactive mode to terminate when all prompts have been completed: - -.. code-block:: bash - - gptme --non-interactive --no-confirm 'create a snake game using curses in snake.py, dont run it' '-' 'make the snake green and the apple red' - -This should make it first write snake.py, then make the change in a following prompt. The '-' is special "multiprompt" syntax that tells the assistant to wait for the next prompt before continuing. - -Web UI -^^^^^^ - -To run the assistant in a web interface, execute: - -.. code-block:: bash - - gptme-server - -This should let you view your chats in a web browser and make basic requests. - -.. note:: - The web interface is still in development and is not fully functional (no confirmation prompts or streaming). +- :doc:`CLI ` +- :ref:`server:web ui` +- :doc:`bot` Support ------- diff --git a/docs/server.rst b/docs/server.rst index 3cac23413..8e33665a7 100644 --- a/docs/server.rst +++ b/docs/server.rst @@ -3,7 +3,7 @@ Server .. note:: The server and web UI is still in development and does not have all the features of the CLI. - It does not support streaming, doesn't ask for confirmation before executing, lacks the ability to interrupt generations, etc. + It does not support streaming, doesn't ask for confirmation before executing, lacks the ability to interrupt responses and tool calls, etc. gptme has a minimal REST API with very minimalistic web UI. @@ -16,6 +16,12 @@ It can be started by running the following command: Web UI ------ +.. code-block:: bash + + gptme-server + +This should let you view your chats in a web browser and make basic requests. + You can then access the web UI by visiting http://localhost:5000 in your browser. -For more usage, see `the CLI documentation `_. +For more usage, see :ref:`the CLI documentation `. diff --git a/docs/tools.rst b/docs/tools.rst index 8bdf515e2..36f55a9b5 100644 --- a/docs/tools.rst +++ b/docs/tools.rst @@ -3,24 +3,31 @@ Tools Tools available in gptme. -The main tools can be grouped in the following categories: +The tools can be grouped into the following categories: -- execution +- Execution - `Shell`_ - `Python`_ - `Tmux`_ + - `Subagent`_ -- filesystem +- Files + - `Read`_ - `Save`_ - `Patch`_ -- network +- Network - `Browser`_ -- chat management +- Vision + + - `Screenshot`_ + - `Vision`_ + +- Chat management - `Chats`_ @@ -45,6 +52,20 @@ Tmux :members: :noindex: +Subagent +-------- + +.. automodule:: gptme.tools.subagent + :members: + :noindex: + +Read +---- + +.. automodule:: gptme.tools.read + :members: + :noindex: + Save ---- @@ -59,6 +80,13 @@ Patch :members: :noindex: +Screenshot +---------- + +.. automodule:: gptme.tools.screenshot + :members: + :noindex: + Browser ------- @@ -66,6 +94,13 @@ Browser :members: :noindex: +Vision +------ + +.. automodule:: gptme.tools.vision + :members: + :noindex: + Chats ----- diff --git a/gptme/tools/base.py b/gptme/tools/base.py index bc7c11c4f..1aaa80e63 100644 --- a/gptme/tools/base.py +++ b/gptme/tools/base.py @@ -1,6 +1,7 @@ import logging from collections.abc import Callable, Generator from dataclasses import dataclass, field +from textwrap import indent from typing import Literal, Protocol, TypeAlias from lxml import etree @@ -58,11 +59,21 @@ def get_doc(self, doc: str | None = None) -> str: doc = "" else: doc += "\n\n" + if self.instructions: + doc += f""" +.. rubric:: Instructions + +.. code-block:: markdown + +{indent(self.instructions, " ")}\n\n""" if self.examples: - doc += ( - f"# Examples\n\n{transform_examples_to_chat_directives(self.examples)}" - ) - return doc + doc += f""" +.. rubric:: Examples + +{transform_examples_to_chat_directives(self.examples)}\n\n +""" + # doc += """.. rubric:: Members""" + return doc.strip() def __eq__(self, other): if not isinstance(other, ToolSpec): diff --git a/gptme/tools/browser.py b/gptme/tools/browser.py index ae69ae2f9..0ee3841a9 100644 --- a/gptme/tools/browser.py +++ b/gptme/tools/browser.py @@ -8,6 +8,13 @@ .. note:: This is an experimental feature. It needs some work to be more robust and useful. + +To use the browser tool, you need to have the `playwright` Python package installed along with gptme, which you can install with: + +.. code-block:: bash + + pipx install gptme[browser] + gptme '/shell playwright install chromium' """ import importlib.util @@ -19,7 +26,7 @@ import tempfile from typing import Literal -from .base import ToolSpec +from .base import ToolSpec, ToolUse has_playwright = importlib.util.find_spec("playwright") is not None @@ -40,64 +47,37 @@ To browse the web, you can use the `read_url`, `search`, and `screenshot_url` functions in Python. """.strip() -examples = """ +examples = f""" ### Answer question from URL with browsing User: find out which is the latest ActivityWatch version from superuserlabs.org Assistant: Let's browse the site. -```ipython -read_url("https://superuserlabs.org/") -``` +{ToolUse("ipython", [], "read_url('https://superuserlabs.org/')").to_output()} System: -```https://superuserlabs.org/ -... -[ActivityWatch](https://activitywatch.net/) -... -``` +{ToolUse("https://superuserlabs.org/", [], "... [ActivityWatch](https://activitywatch.net/) ...".strip()).to_output()} Assistant: Couldn't find the answer on the page. Following link to the ActivityWatch website. -```ipython -read_url("https://activitywatch.net/") -``` +{ToolUse("ipython", [], "read_url('https://activitywatch.net/')").to_output()} System: -```https://activitywatch.net/ -... -Download latest version v0.12.2 -... -``` +{ToolUse("https://activitywatch.net/", [], "... Download latest version v0.12.2 ...".strip()).to_output()} Assistant: The latest version of ActivityWatch is v0.12.2 ### Searching User: who is the founder of ActivityWatch? Assistant: Let's search for that. -```ipython -search("ActivityWatch founder") -``` +{ToolUse("ipython", [], "search('ActivityWatch founder')").to_output()} System: -```results -1. [ActivityWatch](https://activitywatch.net/) -... -``` +{ToolUse("results", [], "1. [ActivityWatch](https://activitywatch.net/) ...").to_output()} Assistant: Following link to the ActivityWatch website. -```ipython -read_url("https://activitywatch.net/") -``` +{ToolUse("ipython", [], "read_url('https://activitywatch.net/')").to_output()} System: -```https://activitywatch.net/ -... -The ActivityWatch project was founded by Erik Bjäreholt in 2016. -... -``` +{ToolUse("https://activitywatch.net/", [], "... The ActivityWatch project was founded by Erik Bjäreholt in 2016. ...".strip()).to_output()} Assistant: The founder of ActivityWatch is Erik Bjäreholt. ### Take screenshot of page User: take a screenshot of the ActivityWatch website Assistant: Certainly! I'll use the browser tool to screenshot the ActivityWatch website. -```ipython -screenshot_url("https://activitywatch.net") -``` +{ToolUse("ipython", [], "screenshot_url('https://activitywatch.net')").to_output()} System: -``` -Screenshot saved to screenshot.png -``` +{ToolUse("result", [], "Screenshot saved to screenshot.png").to_output()} """.strip() diff --git a/gptme/tools/read.py b/gptme/tools/read.py index 9a939ecc7..19ee2fa82 100644 --- a/gptme/tools/read.py +++ b/gptme/tools/read.py @@ -6,8 +6,9 @@ instructions = "Read files using `cat`" examples = f""" -> Assistant: -{ToolUse("bash", [], "cat file.txt").to_output()} +User: read file.txt +Assistant: +{ToolUse("shell", [], "cat file.txt").to_output()} """ # Note: this isn't actually a tool, it only serves prompting purposes diff --git a/gptme/tools/save.py b/gptme/tools/save.py index 2f6dc14e9..192a48964 100644 --- a/gptme/tools/save.py +++ b/gptme/tools/save.py @@ -1,5 +1,5 @@ """ -Gives the assistant the ability to save/write code to a file. +Gives the assistant the ability to save whole files, or append to them. """ from collections.abc import Generator @@ -12,7 +12,11 @@ # FIXME: this is markdown-specific instructions, thus will confuse the XML mode instructions = """ -To write text to a file, use a code block with the language tag set to the path of the file. +To write to a file, use a code block with the language tag: `save ` +""".strip() + +instructions_append = """ +To append to a file, use a code block with the language tag: `append ` """.strip() examples = f""" @@ -25,6 +29,13 @@ > System: Saved to `hello.py` """.strip() +examples_append = f""" +> User: append a print "Hello world" to hello.py +> Assistant: +{ToolUse("append", ["hello.py"], 'print("Hello world")').to_output()} +> System: Appended to `hello.py` +""".strip() + def execute_save( code: str, ask: bool, args: list[str] @@ -141,17 +152,6 @@ def execute_append( ) __doc__ = tool_save.get_doc(__doc__) -instructions_append = """ -To append text to a file, use a code block with the language: append -""".strip() - -examples_append = f""" -> User: append a print "Hello world" to hello.py -> Assistant: -{ToolUse("append", ["hello.py"], 'print("Hello world")').to_output()} -> System: Appended to `hello.py` -""".strip() - tool_append = ToolSpec( name="append", desc="Append text to file", diff --git a/gptme/tools/tmux.py b/gptme/tools/tmux.py index feca84780..8f91bbc2d 100644 --- a/gptme/tools/tmux.py +++ b/gptme/tools/tmux.py @@ -223,6 +223,8 @@ def execute_tmux( #### Get info from ncurses applications User: start top and give me a summary +Assistant: Sure! Let's start the top command in a tmux session: +{ToolUse("tmux", [], "new_session 'top'").to_output()} System: Running `top` in session 1. ```output (output from top shown here) diff --git a/gptme/tools/vision.py b/gptme/tools/vision.py index 078b7e714..585828f62 100644 --- a/gptme/tools/vision.py +++ b/gptme/tools/vision.py @@ -1,3 +1,9 @@ +""" +Tools for viewing images, giving the assistant vision. + +Requires a model which supports vision, such as GPT-4o, Anthropic, and Llama 3.2. +""" + from collections.abc import Generator from pathlib import Path