From d54df51d8ea05711258f67a8cc5c4c0b2efc4608 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= <erik@bjareho.lt>
Date: Tue, 15 Oct 2024 11:59:30 +0200
Subject: [PATCH] fix: more fixes/improvements to treeofthoughts.py

---
 gptme/chat.py             |  19 ++--
 scripts/treeofthoughts.py | 194 +++++++++++++++++++++++++++++++-------
 2 files changed, 169 insertions(+), 44 deletions(-)

diff --git a/gptme/chat.py b/gptme/chat.py
index e4008d26..74cffef7 100644
--- a/gptme/chat.py
+++ b/gptme/chat.py
@@ -106,7 +106,9 @@ def chat(
                 while True:
                     set_interruptible()
                     try:
-                        response_msgs = list(step(manager, no_confirm, stream=stream))
+                        response_msgs = list(
+                            step(manager.log, no_confirm, stream=stream)
+                        )
                     except KeyboardInterrupt:
                         console.log("Interrupted. Stopping current execution.")
                         manager.append(Message("system", "Interrupted"))
@@ -151,7 +153,7 @@ def chat(
 
         # ask for input if no prompt, generate reply, and run tools
         clear_interruptible()  # Ensure we're not interruptible during user input
-        for msg in step(manager, no_confirm, stream=stream):  # pragma: no cover
+        for msg in step(manager.log, no_confirm, stream=stream):  # pragma: no cover
             manager.append(msg)
             # run any user-commands, if msg is from user
             if msg.role == "user" and execute_cmd(msg, manager):
@@ -159,13 +161,13 @@ def chat(
 
 
 def step(
-    log: Log | LogManager,
+    log: Log | list[Message],
     no_confirm: bool,
     stream: bool = True,
 ) -> Generator[Message, None, None]:
     """Runs a single pass of the chat."""
-    if isinstance(log, LogManager):
-        log = log.log
+    if isinstance(log, list):
+        log = Log(log)
 
     # If last message was a response, ask for input.
     # If last message was from the user (such as from crash/edited log),
@@ -179,9 +181,6 @@ def step(
         or not any(role == "user" for role in [m.role for m in log])
     ):  # pragma: no cover
         inquiry = prompt_user()
-        if not inquiry:
-            # Empty command, ask for input again
-            return
         msg = Message("user", inquiry, quiet=True)
         msg = _include_paths(msg)
         yield msg
@@ -215,7 +214,9 @@ def prompt_user(value=None) -> str:  # pragma: no cover
     termios.tcflush(sys.stdin, termios.TCIFLUSH)
     set_interruptible()
     try:
-        response = prompt_input(PROMPT_USER, value)
+        response = ""
+        while not response:
+            response = prompt_input(PROMPT_USER, value)
     except KeyboardInterrupt:
         print("\nInterrupted. Press Ctrl-D to exit.")
         return ""
diff --git a/scripts/treeofthoughts.py b/scripts/treeofthoughts.py
index fdb01233..74b6d367 100644
--- a/scripts/treeofthoughts.py
+++ b/scripts/treeofthoughts.py
@@ -2,8 +2,11 @@
 Tree-branching conversations for gptme with branch evaluation/prediction.
 
 The idea is to evaluate if we are on the right track by checking if the current branch is "good"/making progress, and otherwise backtracking to the last good branch and trying a different prompt/approach.
+
+The goal is to have a more autonomous agent which can self-supervise and make several branching attempts to find the right path to the solution.
 """
 
+import subprocess
 import sys
 from typing import Literal
 
@@ -17,6 +20,40 @@
 EvalAction = Literal["continue", "undo", "done"]
 
 
+def project_files() -> list[str]:
+    # Returns a list of files in the project
+    p = subprocess.run(["git", "ls-files"], capture_output=True, text=True)
+    return p.stdout.splitlines()
+
+
+def changed_files() -> list[str]:
+    # Returns a list of changed files in the project
+    p = subprocess.run(
+        ["git", "diff", "--name-only", "HEAD"], capture_output=True, text=True
+    )
+    return p.stdout.splitlines()
+
+
+def unstaged_files() -> list[str]:
+    # Returns a list of unstaged files in the project
+    p = subprocess.run(["git", "diff", "--name-only"], capture_output=True, text=True)
+    return p.stdout.splitlines()
+
+
+def context_from_files(files: list[str]) -> str:
+    # Returns the context from the files
+    context = ""
+    for f in files:
+        context += f"```{f}\n"
+        with open(f) as file:
+            try:
+                context += file.read()
+            except UnicodeDecodeError:
+                context += "<binary file>"
+        context += "\n```\n"
+    return context
+
+
 def step(log: Log) -> Log:
     # Steps the conversation forward
     for msg in _step(log, no_confirm=True):
@@ -26,7 +63,6 @@ def step(log: Log) -> Log:
 
 def recommendation(log: Log) -> EvalAction:
     # Returns a LLM-guided recommendation for the next action
-    # Can be: undo (backtrack), restart, continue,
     system_msg = Message(
         "system",
         """
@@ -50,44 +86,132 @@ def recommendation(log: Log) -> EvalAction:
         + [Message("system", log_xml)]
         + [Message("user", "evaluate the agent")]
     )
-    log = step(log)  # TODO: use faster model for this
+    log = step(log)
     parser = etree.HTMLParser()
     tree = etree.fromstring(log[-1].content, parser)
     return tree.xpath("//action")[0].text
 
 
-print("Init...")
-init(
-    model="openai/gpt-4o",
-    interactive=False,
-    tool_allowlist=["python", "shell", "save", "patch"],
-)
+def lint_format(log: Log) -> Log:
+    # Lint, format, and fix the conversation by calling "make format"
+    p = subprocess.run(["make", "format"], capture_output=True, text=True)
+    if p.returncode == 0:
+        return log
 
-# Set up the conversation
-prompt = sys.argv[1] if len(sys.argv) > 1 else "What is fib(10)?"
-prompts = [Message("user", prompt)]
-initial_msgs = [get_prompt("full", interactive=False)]
-log = Log(initial_msgs + prompts)
+    changed_files = [f for f in unstaged_files() if f in p.stdout or f in p.stderr]
+    files_str = f"""Files:
+{context_from_files(changed_files)}
+"""
 
-while True:
-    # Step it forward
-    print("Stepping...")
-    log = step(log)
-    print("Done with step")
-
-    # Evaluate the conversation
-    action = recommendation(log)
-    print(f"Recommendation: {action}")
-
-    # Take the recommended action
-    if action == "continue":
-        continue
-    elif action == "undo":
-        log = log.pop()
-    elif action == "done":
-        break
-    else:
-        raise ValueError(f"Invalid action: {action}")
-
-# Print the final conversation
-log.print()
+    system_msg = Message(
+        "system",
+        f"""
+Linting and formatting the code with "make format"...
+
+stdout:
+{p.stdout}
+
+stderr:
+{p.stderr}
+
+{files_str}
+""".strip(),
+    )
+    log = log.append(system_msg)
+    return log
+
+
+def typecheck(log: Log) -> Log:
+    # Typecheck the code by calling "make typecheck"
+    p = subprocess.run(["make", "typecheck"], capture_output=True, text=True)
+    if p.returncode == 0:
+        return log
+
+    system_msg = Message(
+        "system",
+        f"""
+Typechecking the code with "make typecheck"...
+
+stdout:
+{p.stdout}
+
+stderr:
+{p.stderr}
+""",
+    )
+    log = log.append(system_msg)
+    return log
+
+
+context_header = "Context:\n\n"
+
+
+def gather_context() -> Message:
+    # Dynamically gather context from changed files
+    files = changed_files()
+    return Message("system", context_header + context_from_files(files))
+
+
+def update_context(log: Log) -> Log:
+    # remove the last context message
+    msgs = [msg for msg in log if not msg.content.startswith(context_header)]
+    return Log(msgs + [gather_context()])
+
+
+def main():
+    print("Initializing the autonomous agent...")
+    init(
+        model="openai/gpt-4o",
+        interactive=False,
+        tool_allowlist=["python", "shell", "save", "patch"],
+    )
+
+    # Set up the conversation
+    prompt = sys.argv[1] if len(sys.argv) > 1 else "What is fib(10)?"
+    prompts = [Message("user", prompt)]
+    initial_msgs = [get_prompt("full", interactive=False)]
+    log = Log(initial_msgs + prompts)
+
+    # Main loop for autonomous operation
+    while True:
+        # Gather and update context
+        log = update_context(log)
+        print("Context updated.")
+
+        # Step the conversation forward
+        log = step(log)
+        print("Conversation stepped forward.")
+
+        # Check for changes in the project files
+        if (
+            subprocess.run(
+                ["git", "diff", "--exit-code"], capture_output=True
+            ).returncode
+            != 0
+        ):
+            print("Changes detected, performing lint and typecheck.")
+            log = lint_format(log)
+            log = typecheck(log)
+
+        # Get recommendation for next action
+        action = recommendation(log)
+        print(f"Recommended action: {action}")
+
+        # Execute the recommended action
+        if action == "continue":
+            continue
+        elif action == "undo":
+            log = log.pop()
+            print("Undoing last step.")
+        elif action == "done":
+            print("Task completed successfully.")
+            break
+        else:
+            print(f"Unexpected action: {action}")
+            break
+
+    print("Exiting")
+
+
+if __name__ == "__main__":
+    main()