From d54df51d8ea05711258f67a8cc5c4c0b2efc4608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Tue, 15 Oct 2024 11:59:30 +0200 Subject: [PATCH] fix: more fixes/improvements to treeofthoughts.py --- gptme/chat.py | 19 ++-- scripts/treeofthoughts.py | 194 +++++++++++++++++++++++++++++++------- 2 files changed, 169 insertions(+), 44 deletions(-) diff --git a/gptme/chat.py b/gptme/chat.py index e4008d26..74cffef7 100644 --- a/gptme/chat.py +++ b/gptme/chat.py @@ -106,7 +106,9 @@ def chat( while True: set_interruptible() try: - response_msgs = list(step(manager, no_confirm, stream=stream)) + response_msgs = list( + step(manager.log, no_confirm, stream=stream) + ) except KeyboardInterrupt: console.log("Interrupted. Stopping current execution.") manager.append(Message("system", "Interrupted")) @@ -151,7 +153,7 @@ def chat( # ask for input if no prompt, generate reply, and run tools clear_interruptible() # Ensure we're not interruptible during user input - for msg in step(manager, no_confirm, stream=stream): # pragma: no cover + for msg in step(manager.log, no_confirm, stream=stream): # pragma: no cover manager.append(msg) # run any user-commands, if msg is from user if msg.role == "user" and execute_cmd(msg, manager): @@ -159,13 +161,13 @@ def chat( def step( - log: Log | LogManager, + log: Log | list[Message], no_confirm: bool, stream: bool = True, ) -> Generator[Message, None, None]: """Runs a single pass of the chat.""" - if isinstance(log, LogManager): - log = log.log + if isinstance(log, list): + log = Log(log) # If last message was a response, ask for input. # If last message was from the user (such as from crash/edited log), @@ -179,9 +181,6 @@ def step( or not any(role == "user" for role in [m.role for m in log]) ): # pragma: no cover inquiry = prompt_user() - if not inquiry: - # Empty command, ask for input again - return msg = Message("user", inquiry, quiet=True) msg = _include_paths(msg) yield msg @@ -215,7 +214,9 @@ def prompt_user(value=None) -> str: # pragma: no cover termios.tcflush(sys.stdin, termios.TCIFLUSH) set_interruptible() try: - response = prompt_input(PROMPT_USER, value) + response = "" + while not response: + response = prompt_input(PROMPT_USER, value) except KeyboardInterrupt: print("\nInterrupted. Press Ctrl-D to exit.") return "" diff --git a/scripts/treeofthoughts.py b/scripts/treeofthoughts.py index fdb01233..74b6d367 100644 --- a/scripts/treeofthoughts.py +++ b/scripts/treeofthoughts.py @@ -2,8 +2,11 @@ Tree-branching conversations for gptme with branch evaluation/prediction. The idea is to evaluate if we are on the right track by checking if the current branch is "good"/making progress, and otherwise backtracking to the last good branch and trying a different prompt/approach. + +The goal is to have a more autonomous agent which can self-supervise and make several branching attempts to find the right path to the solution. """ +import subprocess import sys from typing import Literal @@ -17,6 +20,40 @@ EvalAction = Literal["continue", "undo", "done"] +def project_files() -> list[str]: + # Returns a list of files in the project + p = subprocess.run(["git", "ls-files"], capture_output=True, text=True) + return p.stdout.splitlines() + + +def changed_files() -> list[str]: + # Returns a list of changed files in the project + p = subprocess.run( + ["git", "diff", "--name-only", "HEAD"], capture_output=True, text=True + ) + return p.stdout.splitlines() + + +def unstaged_files() -> list[str]: + # Returns a list of unstaged files in the project + p = subprocess.run(["git", "diff", "--name-only"], capture_output=True, text=True) + return p.stdout.splitlines() + + +def context_from_files(files: list[str]) -> str: + # Returns the context from the files + context = "" + for f in files: + context += f"```{f}\n" + with open(f) as file: + try: + context += file.read() + except UnicodeDecodeError: + context += "" + context += "\n```\n" + return context + + def step(log: Log) -> Log: # Steps the conversation forward for msg in _step(log, no_confirm=True): @@ -26,7 +63,6 @@ def step(log: Log) -> Log: def recommendation(log: Log) -> EvalAction: # Returns a LLM-guided recommendation for the next action - # Can be: undo (backtrack), restart, continue, system_msg = Message( "system", """ @@ -50,44 +86,132 @@ def recommendation(log: Log) -> EvalAction: + [Message("system", log_xml)] + [Message("user", "evaluate the agent")] ) - log = step(log) # TODO: use faster model for this + log = step(log) parser = etree.HTMLParser() tree = etree.fromstring(log[-1].content, parser) return tree.xpath("//action")[0].text -print("Init...") -init( - model="openai/gpt-4o", - interactive=False, - tool_allowlist=["python", "shell", "save", "patch"], -) +def lint_format(log: Log) -> Log: + # Lint, format, and fix the conversation by calling "make format" + p = subprocess.run(["make", "format"], capture_output=True, text=True) + if p.returncode == 0: + return log -# Set up the conversation -prompt = sys.argv[1] if len(sys.argv) > 1 else "What is fib(10)?" -prompts = [Message("user", prompt)] -initial_msgs = [get_prompt("full", interactive=False)] -log = Log(initial_msgs + prompts) + changed_files = [f for f in unstaged_files() if f in p.stdout or f in p.stderr] + files_str = f"""Files: +{context_from_files(changed_files)} +""" -while True: - # Step it forward - print("Stepping...") - log = step(log) - print("Done with step") - - # Evaluate the conversation - action = recommendation(log) - print(f"Recommendation: {action}") - - # Take the recommended action - if action == "continue": - continue - elif action == "undo": - log = log.pop() - elif action == "done": - break - else: - raise ValueError(f"Invalid action: {action}") - -# Print the final conversation -log.print() + system_msg = Message( + "system", + f""" +Linting and formatting the code with "make format"... + +stdout: +{p.stdout} + +stderr: +{p.stderr} + +{files_str} +""".strip(), + ) + log = log.append(system_msg) + return log + + +def typecheck(log: Log) -> Log: + # Typecheck the code by calling "make typecheck" + p = subprocess.run(["make", "typecheck"], capture_output=True, text=True) + if p.returncode == 0: + return log + + system_msg = Message( + "system", + f""" +Typechecking the code with "make typecheck"... + +stdout: +{p.stdout} + +stderr: +{p.stderr} +""", + ) + log = log.append(system_msg) + return log + + +context_header = "Context:\n\n" + + +def gather_context() -> Message: + # Dynamically gather context from changed files + files = changed_files() + return Message("system", context_header + context_from_files(files)) + + +def update_context(log: Log) -> Log: + # remove the last context message + msgs = [msg for msg in log if not msg.content.startswith(context_header)] + return Log(msgs + [gather_context()]) + + +def main(): + print("Initializing the autonomous agent...") + init( + model="openai/gpt-4o", + interactive=False, + tool_allowlist=["python", "shell", "save", "patch"], + ) + + # Set up the conversation + prompt = sys.argv[1] if len(sys.argv) > 1 else "What is fib(10)?" + prompts = [Message("user", prompt)] + initial_msgs = [get_prompt("full", interactive=False)] + log = Log(initial_msgs + prompts) + + # Main loop for autonomous operation + while True: + # Gather and update context + log = update_context(log) + print("Context updated.") + + # Step the conversation forward + log = step(log) + print("Conversation stepped forward.") + + # Check for changes in the project files + if ( + subprocess.run( + ["git", "diff", "--exit-code"], capture_output=True + ).returncode + != 0 + ): + print("Changes detected, performing lint and typecheck.") + log = lint_format(log) + log = typecheck(log) + + # Get recommendation for next action + action = recommendation(log) + print(f"Recommended action: {action}") + + # Execute the recommended action + if action == "continue": + continue + elif action == "undo": + log = log.pop() + print("Undoing last step.") + elif action == "done": + print("Task completed successfully.") + break + else: + print(f"Unexpected action: {action}") + break + + print("Exiting") + + +if __name__ == "__main__": + main()