From 8878d9bfce9762ad0731b177180b78c5c19476ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= <erik@bjareho.lt>
Date: Thu, 10 Oct 2024 10:54:36 +0200
Subject: [PATCH] docs: improved getting started, tool docs, and docstrings

---
 docs/cli.rst             | 16 +++++++++++
 docs/conf.py             |  2 +-
 docs/getting-started.rst | 47 +++++++++-----------------------
 docs/server.rst          | 10 +++++--
 docs/tools.rst           | 45 +++++++++++++++++++++++++++----
 gptme/tools/base.py      | 19 ++++++++++---
 gptme/tools/browser.py   | 58 +++++++++++++---------------------------
 gptme/tools/chats.py     |  8 +++---
 gptme/tools/gh.py        | 49 +++++++++++++++++----------------
 gptme/tools/patch.py     | 18 +++++--------
 gptme/tools/python.py    | 12 +++------
 gptme/tools/read.py      |  5 ++--
 gptme/tools/save.py      | 26 +++++++++---------
 gptme/tools/shell.py     |  9 +++----
 gptme/tools/tmux.py      | 14 ++++------
 gptme/tools/vision.py    |  6 +++++
 16 files changed, 181 insertions(+), 163 deletions(-)

diff --git a/docs/cli.rst b/docs/cli.rst
index c0a1857a..16314968 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -10,14 +10,30 @@ gptme provides the following commands:
 
 This is the full CLI reference. For a more concise version, run ``gptme --help``.
 
+.. rubric:: gptme
+
+You can skip confirmation prompts and run in non-interactive mode to terminate when all prompts have been completed:
+
+.. code-block:: bash
+
+    gptme --non-interactive --no-confirm 'create a snake game using curses in snake.py, dont run it' '-' 'make the snake green and the apple red'
+
+This should make it first write snake.py, then make the change in a following prompt.
+
+The '-' is special "multiprompt" syntax that tells the assistant to wait for the assistant to finish work on the next prompt (run until no more tool calls) before continuing.
+
 .. click:: gptme.cli:main
    :prog: gptme
    :nested: full
 
+.. rubric:: gptme-server
+
 .. click:: gptme.server:main
    :prog: gptme-server
    :nested: full
 
+.. rubric:: gptme-eval
+
 .. click:: gptme.eval:main
    :prog: gptme-eval
    :nested: full
diff --git a/docs/conf.py b/docs/conf.py
index 492ca49f..4b318b8d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -4,7 +4,6 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
-
 import re
 from datetime import date
 
@@ -116,6 +115,7 @@ def setup(app):
     ("py:class", "pathlib.Path"),
     ("py:class", "flask.app.Flask"),
     ("py:class", "gptme.tools.python.T"),
+    ("py:class", "threading.Thread"),
 ]
 
 # -- Options for HTML output -------------------------------------------------
diff --git a/docs/getting-started.rst b/docs/getting-started.rst
index 649d05b2..c23e1dc8 100644
--- a/docs/getting-started.rst
+++ b/docs/getting-started.rst
@@ -29,8 +29,7 @@ To initiate a new chat or select an existing one, execute:
 
 This will show you a list of past chats, allowing you to select one or start a new one.
 
-Writing a file
-**************
+.. rubric:: Writing a file
 
 You can then interact with the assistant. Lets start by asking it to write code.
 
@@ -50,8 +49,7 @@ You can then interact with the assistant. Lets start by asking it to write code.
 
 The assistant will prompt for your confirmation and save the file, as requested.
 
-Making changes
-**************
+.. rubric:: Making changes
 
 We can also start chats and request changes directly from the command line. The contents of any mentioned text files will be included as context, and the assistant will generate patches to apply the requested changes:
 
@@ -74,42 +72,23 @@ We can also start chats and request changes directly from the command line. The
    System: Patch applied
 
 .. note::
-    With the browser extras installed, the assistant can also process URLs included in the prompt.
+    With the :ref:`tools:browser` extras installed, the assistant can also process URLs included in the prompt.
 
-Other tools
-***********
+More tools
+**********
 
-You can read about other tools on the :doc:`tools` page.
+You can read about all the other tools on the :doc:`tools` page.
 
-Other interfaces
-****************
+Including :ref:`tools:shell`, :ref:`tools:python`, how to set up :ref:`tools:browser`, and use :ref:`tools:vision`.
 
-There are other ways to interact with the assistant:
+Interfaces
+**********
 
-Command line
-^^^^^^^^^^^^
+There are several ways to interact with gptme:
 
-Commands can also be executed directly from the command line. For example, one can skip confirmation prompts and  run in non-interactive mode to terminate when all prompts have been completed:
-
-.. code-block:: bash
-
-    gptme --non-interactive --no-confirm 'create a snake game using curses in snake.py, dont run it' '-' 'make the snake green and the apple red'
-
-This should make it first write snake.py, then make the change in a following prompt. The '-' is special "multiprompt" syntax that tells the assistant to wait for the next prompt before continuing.
-
-Web UI
-^^^^^^
-
-To run the assistant in a web interface, execute:
-
-.. code-block:: bash
-
-    gptme-server
-
-This should let you view your chats in a web browser and make basic requests.
-
-.. note::
-    The web interface is still in development and is not fully functional (no confirmation prompts or streaming).
+- :doc:`CLI <cli>`
+- :ref:`server:web ui`
+- :doc:`bot`
 
 Support
 -------
diff --git a/docs/server.rst b/docs/server.rst
index 3cac2341..8e33665a 100644
--- a/docs/server.rst
+++ b/docs/server.rst
@@ -3,7 +3,7 @@ Server
 
 .. note::
    The server and web UI is still in development and does not have all the features of the CLI.
-   It does not support streaming, doesn't ask for confirmation before executing, lacks the ability to interrupt generations, etc.
+   It does not support streaming, doesn't ask for confirmation before executing, lacks the ability to interrupt responses and tool calls, etc.
 
 gptme has a minimal REST API with very minimalistic web UI.
 
@@ -16,6 +16,12 @@ It can be started by running the following command:
 Web UI
 ------
 
+.. code-block:: bash
+
+    gptme-server
+
+This should let you view your chats in a web browser and make basic requests.
+
 You can then access the web UI by visiting http://localhost:5000 in your browser.
 
-For more usage, see `the CLI documentation <cli.html#gptme-server>`_.
+For more usage, see :ref:`the CLI documentation <cli:gptme-server>`.
diff --git a/docs/tools.rst b/docs/tools.rst
index 8bdf515e..36f55a9b 100644
--- a/docs/tools.rst
+++ b/docs/tools.rst
@@ -3,24 +3,31 @@ Tools
 
 Tools available in gptme.
 
-The main tools can be grouped in the following categories:
+The tools can be grouped into the following categories:
 
-- execution
+- Execution
 
   - `Shell`_
   - `Python`_
   - `Tmux`_
+  - `Subagent`_
 
-- filesystem
+- Files
 
+  - `Read`_
   - `Save`_
   - `Patch`_
 
-- network
+- Network
 
   - `Browser`_
 
-- chat management
+- Vision
+
+  - `Screenshot`_
+  - `Vision`_
+
+- Chat management
 
   - `Chats`_
 
@@ -45,6 +52,20 @@ Tmux
     :members:
     :noindex:
 
+Subagent
+--------
+
+.. automodule:: gptme.tools.subagent
+    :members:
+    :noindex:
+
+Read
+----
+
+.. automodule:: gptme.tools.read
+    :members:
+    :noindex:
+
 Save
 ----
 
@@ -59,6 +80,13 @@ Patch
     :members:
     :noindex:
 
+Screenshot
+----------
+
+.. automodule:: gptme.tools.screenshot
+    :members:
+    :noindex:
+
 Browser
 -------
 
@@ -66,6 +94,13 @@ Browser
     :members:
     :noindex:
 
+Vision
+------
+
+.. automodule:: gptme.tools.vision
+    :members:
+    :noindex:
+
 Chats
 -----
 
diff --git a/gptme/tools/base.py b/gptme/tools/base.py
index bc7c11c4..1aaa80e6 100644
--- a/gptme/tools/base.py
+++ b/gptme/tools/base.py
@@ -1,6 +1,7 @@
 import logging
 from collections.abc import Callable, Generator
 from dataclasses import dataclass, field
+from textwrap import indent
 from typing import Literal, Protocol, TypeAlias
 
 from lxml import etree
@@ -58,11 +59,21 @@ def get_doc(self, doc: str | None = None) -> str:
             doc = ""
         else:
             doc += "\n\n"
+        if self.instructions:
+            doc += f"""
+.. rubric:: Instructions
+
+.. code-block:: markdown
+
+{indent(self.instructions, "    ")}\n\n"""
         if self.examples:
-            doc += (
-                f"# Examples\n\n{transform_examples_to_chat_directives(self.examples)}"
-            )
-        return doc
+            doc += f"""
+.. rubric:: Examples
+
+{transform_examples_to_chat_directives(self.examples)}\n\n
+"""
+        # doc += """.. rubric:: Members"""
+        return doc.strip()
 
     def __eq__(self, other):
         if not isinstance(other, ToolSpec):
diff --git a/gptme/tools/browser.py b/gptme/tools/browser.py
index ae69ae2f..0ee3841a 100644
--- a/gptme/tools/browser.py
+++ b/gptme/tools/browser.py
@@ -8,6 +8,13 @@
 .. note::
 
     This is an experimental feature. It needs some work to be more robust and useful.
+
+To use the browser tool, you need to have the `playwright` Python package installed along with gptme, which you can install with:
+
+.. code-block:: bash
+
+    pipx install gptme[browser]
+    gptme '/shell playwright install chromium'
 """
 
 import importlib.util
@@ -19,7 +26,7 @@
 import tempfile
 from typing import Literal
 
-from .base import ToolSpec
+from .base import ToolSpec, ToolUse
 
 has_playwright = importlib.util.find_spec("playwright") is not None
 
@@ -40,64 +47,37 @@
 To browse the web, you can use the `read_url`, `search`, and `screenshot_url` functions in Python.
 """.strip()
 
-examples = """
+examples = f"""
 ### Answer question from URL with browsing
 User: find out which is the latest ActivityWatch version from superuserlabs.org
 Assistant: Let's browse the site.
-```ipython
-read_url("https://superuserlabs.org/")
-```
+{ToolUse("ipython", [], "read_url('https://superuserlabs.org/')").to_output()}
 System:
-```https://superuserlabs.org/
-...
-[ActivityWatch](https://activitywatch.net/)
-...
-```
+{ToolUse("https://superuserlabs.org/", [], "... [ActivityWatch](https://activitywatch.net/) ...".strip()).to_output()}
 Assistant: Couldn't find the answer on the page. Following link to the ActivityWatch website.
-```ipython
-read_url("https://activitywatch.net/")
-```
+{ToolUse("ipython", [], "read_url('https://activitywatch.net/')").to_output()}
 System:
-```https://activitywatch.net/
-...
-Download latest version v0.12.2
-...
-```
+{ToolUse("https://activitywatch.net/", [], "... Download latest version v0.12.2 ...".strip()).to_output()}
 Assistant: The latest version of ActivityWatch is v0.12.2
 
 ### Searching
 User: who is the founder of ActivityWatch?
 Assistant: Let's search for that.
-```ipython
-search("ActivityWatch founder")
-```
+{ToolUse("ipython", [], "search('ActivityWatch founder')").to_output()}
 System:
-```results
-1. [ActivityWatch](https://activitywatch.net/)
-...
-```
+{ToolUse("results", [], "1. [ActivityWatch](https://activitywatch.net/) ...").to_output()}
 Assistant: Following link to the ActivityWatch website.
-```ipython
-read_url("https://activitywatch.net/")
-```
+{ToolUse("ipython", [], "read_url('https://activitywatch.net/')").to_output()}
 System:
-```https://activitywatch.net/
-...
-The ActivityWatch project was founded by Erik Bjäreholt in 2016.
-...
-```
+{ToolUse("https://activitywatch.net/", [], "... The ActivityWatch project was founded by Erik Bjäreholt in 2016. ...".strip()).to_output()}
 Assistant: The founder of ActivityWatch is Erik Bjäreholt.
 
 ### Take screenshot of page
 User: take a screenshot of the ActivityWatch website
 Assistant: Certainly! I'll use the browser tool to screenshot the ActivityWatch website.
-```ipython
-screenshot_url("https://activitywatch.net")
-```
+{ToolUse("ipython", [], "screenshot_url('https://activitywatch.net')").to_output()}
 System:
-```
-Screenshot saved to screenshot.png
-```
+{ToolUse("result", [], "Screenshot saved to screenshot.png").to_output()}
 """.strip()
 
 
diff --git a/gptme/tools/chats.py b/gptme/tools/chats.py
index 7a003036..991b3f02 100644
--- a/gptme/tools/chats.py
+++ b/gptme/tools/chats.py
@@ -9,7 +9,7 @@
 from typing import TYPE_CHECKING
 
 from ..message import Message
-from .base import ToolSpec
+from .base import ToolSpec, ToolUse
 
 if TYPE_CHECKING:
     from ..logmanager import LogManager
@@ -193,13 +193,11 @@ def read_chat(conversation: str, max_results: int = 5, incl_system=False) -> Non
 The chats tool allows you to list, search, and summarize past conversation logs.
 """
 
-examples = """
+examples = f"""
 ### Search for a specific topic in past conversations
 User: Can you find any mentions of "python" in our past conversations?
 Assistant: Certainly! I'll search our past conversations for mentions of "python" using the search_chats function.
-```ipython
-search_chats("python")
-```
+{ToolUse("ipython", [], "search_chats('python')").to_output()}
 """
 
 tool = ToolSpec(
diff --git a/gptme/tools/gh.py b/gptme/tools/gh.py
index 4cb02efa..4228003b 100644
--- a/gptme/tools/gh.py
+++ b/gptme/tools/gh.py
@@ -1,45 +1,44 @@
 import shutil
 
-from . import ToolSpec
+from . import ToolSpec, ToolUse
 
 
 def has_gh_tool() -> bool:
     return shutil.which("gh") is not None
 
 
-# Note: this isn't actually a tool, it only serves prompting purposes
-tool = ToolSpec(
-    name="gh",
-    available=has_gh_tool(),
-    desc="Interact with GitHub",
-    instructions="",
-    examples="""Here are examples of how to use the GitHub CLI (gh) to interact with GitHub.
+instructions = "Interact with GitHub via the GitHub CLI (gh)."
 
-> User: create a public repo from the current directory, and push
-Note: --confirm and -y are deprecated, and no longer needed
-```sh
+examples = f"""
+> User: create a public repo from the current directory, and push. Note that --confirm and -y are deprecated, and no longer needed.
+> Assistant:
+{ToolUse("shell", [], """
 REPO=$(basename $(pwd))
 gh repo create $REPO --public --source . --push
-```
+""").to_output()}
 
 > User: show issues
-```sh
-gh issue list --repo $REPO
-```
+> Assistant:
+{ToolUse("shell", [], "gh issue list --repo $REPO").to_output()}
 
 > User: read issue with comments
-```sh
-gh issue view $ISSUE --repo $REPO --comments
-```
+> Assistant:
+{ToolUse("shell", [], "gh issue view $ISSUE --repo $REPO --comments").to_output()}
 
 > User: show recent workflows
-```sh
-gh run list --status failure --repo $REPO --limit 5
-```
+> Assistant:
+{ToolUse("shell", [], "gh run list --repo $REPO --limit 5").to_output()}
 
 > User: show workflow
-```sh
-gh run view $RUN --repo $REPO --log
-```
-    """,
+> Assistant:
+{ToolUse("shell", [], "gh run view $RUN --repo $REPO --log").to_output()}
+"""
+
+# Note: this isn't actually a tool, it only serves prompting purposes
+tool = ToolSpec(
+    name="gh",
+    available=has_gh_tool(),
+    desc="Interact with GitHub",
+    instructions=instructions,
+    examples=examples,
 )
diff --git a/gptme/tools/patch.py b/gptme/tools/patch.py
index b12b3647..0c46be0e 100644
--- a/gptme/tools/patch.py
+++ b/gptme/tools/patch.py
@@ -12,15 +12,10 @@
 from ..util import ask_execute, print_preview
 from .base import ToolSpec, ToolUse
 
-
-def patch_to_output(filename: str, patch: str) -> str:
-    return ToolUse("patch", [filename], patch.strip()).to_output()
-
-
 instructions = f"""
-To patch/modify files, we can use an adapted version of git conflict markers.
+To patch/modify files, we use an adapted version of git conflict markers.
 
-This can be used to make changes to files, without having to rewrite the whole file.
+This can be used to edit files, without having to rewrite the whole file.
 Only one patch block can be written per codeblock. Extra ORIGINAL/UPDATED blocks will be ignored.
 Try to keep the patch as small as possible. Avoid placeholders, as they may make the patch fail.
 
@@ -29,13 +24,13 @@ def patch_to_output(filename: str, patch: str) -> str:
 
 The patch block should be written in the following format:
 
-{patch_to_output("$FILENAME", '''
+{ToolUse("patch", ["$FILENAME"], '''
 <<<<<<< ORIGINAL
 $ORIGINAL_CONTENT
 =======
 $UPDATED_CONTENT
 >>>>>>> UPDATED
-''')}
+'''.strip()).to_output()}
 """
 
 ORIGINAL = "<<<<<<< ORIGINAL\n"
@@ -45,7 +40,8 @@ def patch_to_output(filename: str, patch: str) -> str:
 
 examples = f"""
 > User: patch the file `hello.py` to ask for the name of the user
-> Assistant: {patch_to_output("hello.py", '''
+> Assistant:
+{ToolUse("patch", ["hello.py"], '''
 <<<<<<< ORIGINAL
 def hello():
     print("Hello world")
@@ -54,7 +50,7 @@ def hello():
     name = input("What is your name? ")
     print(f"Hello {name}")
 >>>>>>> UPDATED
-'''.strip())}
+'''.strip()).to_output()}
 > System: Patch applied
 """
 
diff --git a/gptme/tools/python.py b/gptme/tools/python.py
index 69035049..61f8f689 100644
--- a/gptme/tools/python.py
+++ b/gptme/tools/python.py
@@ -177,17 +177,15 @@ def get_installed_python_libraries() -> set[str]:
 > Assistant:
 {ToolUse("ipython", [], "2 + 2").to_output()}
 > System: Executed code block.
-```result
-4
-```
+{ToolUse("result", [], "4").to_output()}
 
 #### It can write an example and then execute it:
 > User: compute fib 10
 > Assistant: To compute the 10th Fibonacci number, we write a recursive function:
-```python
+{ToolUse("ipython", [], '''
 def fib(n):
     ...
-```
+''').to_output()}
 Now, let's execute this code to get the 10th Fibonacci number:
 {ToolUse("ipython", [], '''
 def fib(n):
@@ -197,9 +195,7 @@ def fib(n):
 fib(10)
 ''').to_output()}
 > System: Executed code block.
-```result
-55
-```
+{ToolUse("result", [], "55").to_output()}
 """.strip()
 
 
diff --git a/gptme/tools/read.py b/gptme/tools/read.py
index 9a939ecc..19ee2fa8 100644
--- a/gptme/tools/read.py
+++ b/gptme/tools/read.py
@@ -6,8 +6,9 @@
 
 instructions = "Read files using `cat`"
 examples = f"""
-> Assistant:
-{ToolUse("bash", [], "cat file.txt").to_output()}
+User: read file.txt
+Assistant:
+{ToolUse("shell", [], "cat file.txt").to_output()}
 """
 
 # Note: this isn't actually a tool, it only serves prompting purposes
diff --git a/gptme/tools/save.py b/gptme/tools/save.py
index 2f6dc14e..192a4896 100644
--- a/gptme/tools/save.py
+++ b/gptme/tools/save.py
@@ -1,5 +1,5 @@
 """
-Gives the assistant the ability to save/write code to a file.
+Gives the assistant the ability to save whole files, or append to them.
 """
 
 from collections.abc import Generator
@@ -12,7 +12,11 @@
 
 # FIXME: this is markdown-specific instructions, thus will confuse the XML mode
 instructions = """
-To write text to a file, use a code block with the language tag set to the path of the file.
+To write to a file, use a code block with the language tag: `save <path>`
+""".strip()
+
+instructions_append = """
+To append to a file, use a code block with the language tag: `append <path>`
 """.strip()
 
 examples = f"""
@@ -25,6 +29,13 @@
 > System: Saved to `hello.py`
 """.strip()
 
+examples_append = f"""
+> User: append a print "Hello world" to hello.py
+> Assistant:
+{ToolUse("append", ["hello.py"], 'print("Hello world")').to_output()}
+> System: Appended to `hello.py`
+""".strip()
+
 
 def execute_save(
     code: str, ask: bool, args: list[str]
@@ -141,17 +152,6 @@ def execute_append(
 )
 __doc__ = tool_save.get_doc(__doc__)
 
-instructions_append = """
-To append text to a file, use a code block with the language: append <filepath>
-""".strip()
-
-examples_append = f"""
-> User: append a print "Hello world" to hello.py
-> Assistant:
-{ToolUse("append", ["hello.py"], 'print("Hello world")').to_output()}
-> System: Appended to `hello.py`
-""".strip()
-
 tool_append = ToolSpec(
     name="append",
     desc="Append text to file",
diff --git a/gptme/tools/shell.py b/gptme/tools/shell.py
index 96d260d7..ebec6c68 100644
--- a/gptme/tools/shell.py
+++ b/gptme/tools/shell.py
@@ -31,7 +31,7 @@ def get_installed_programs() -> set[str]:
         "pacman",
         # common and useful
         "ffmpeg",
-        "convert",
+        "magick",
         "pandoc",
         "git",
         "docker",
@@ -57,7 +57,6 @@ def get_installed_programs() -> set[str]:
 """.strip()
 
 examples = f"""
-
 User: list the current directory
 Assistant: To list the files in the current directory, use `ls`:
 {ToolUse("shell", [], "ls").to_output()}
@@ -92,15 +91,15 @@ def get_installed_programs() -> set[str]:
 Assistant: Sure! Let's create a new vue project with TypeScript and Pinia named fancy-project:
 {ToolUse("shell", [], "npm init vue@latest fancy-project --yes -- --typescript --pinia").to_output()}
 System:
-```output
+{ToolUse("stdout", [], '''
 > npx
 > create-vue
 
 Vue.js - The Progressive JavaScript Framework
 
 Scaffolding project in ./fancy-project...
-```
-""".strip()
+'''.strip()).to_output()}
+"""
 
 
 class ShellSession:
diff --git a/gptme/tools/tmux.py b/gptme/tools/tmux.py
index feca8478..33a0096a 100644
--- a/gptme/tools/tmux.py
+++ b/gptme/tools/tmux.py
@@ -124,9 +124,7 @@ def inspect_pane(pane_id: str) -> Message:
     return Message(
         "system",
         f"""Pane content:
-```output
-{content}
-```""",
+{ToolUse("output", [], content).to_output()}""",
     )
 
 
@@ -212,9 +210,7 @@ def execute_tmux(
 Assistant: Of course! Let's inspect the pane content:
 {ToolUse("tmux", [], "inspect_pane 0").to_output()}
 System:
-```output
-Server is running on localhost:5600
-```
+{ToolUse("output", [], "Server is running on localhost:5600").to_output()}
 
 User: Stop the dev server
 Assistant: I'll send 'Ctrl+C' to the pane to stop the server:
@@ -223,10 +219,10 @@ def execute_tmux(
 
 #### Get info from ncurses applications
 User: start top and give me a summary
+Assistant: Sure! Let's start the top command in a tmux session:
+{ToolUse("tmux", [], "new_session 'top'").to_output()}
 System: Running `top` in session 1.
-```output
-(output from top shown here)
-```
+{ToolUse("output", [], "(output from top shown here)").to_output()}
 Assistant: The load is...
 
 #### Background process
diff --git a/gptme/tools/vision.py b/gptme/tools/vision.py
index 078b7e71..585828f6 100644
--- a/gptme/tools/vision.py
+++ b/gptme/tools/vision.py
@@ -1,3 +1,9 @@
+"""
+Tools for viewing images, giving the assistant vision.
+
+Requires a model which supports vision, such as GPT-4o, Anthropic, and Llama 3.2.
+"""
+
 from collections.abc import Generator
 from pathlib import Path