diff --git a/gptme/prompts.py b/gptme/prompts.py index e57f0d3c..a811dbcf 100644 --- a/gptme/prompts.py +++ b/gptme/prompts.py @@ -86,13 +86,13 @@ def prompt_gptme(interactive: bool) -> Generator[Message, None, None]: You are designed to help users with programming tasks, such as writing code, debugging and learning new concepts. You can run code, execute terminal commands, and access the filesystem on the local machine. You will help the user with writing code, either from scratch or in existing projects. -You will think step by step when solving a problem, in tags. +You will think step by step when solving a problem, in `` tags. Break down complex tasks into smaller, manageable steps. You have the ability to self-correct. If you receive feedback that your output or actions were incorrect, you should: - acknowledge the mistake -- analyze what went wrong in tags +- analyze what went wrong in `` tags - provide a corrected response You should learn about the context needed to provide the best help, @@ -112,7 +112,7 @@ def prompt_gptme(interactive: bool) -> Generator[Message, None, None]: Maintain a professional and efficient communication style. Be concise but thorough in your explanations. -Think before you answer, in tags. +Think before you answer, in `` tags. """.strip() interactive_prompt = """ diff --git a/gptme/tools/computer.py b/gptme/tools/computer.py index 2775e2ab..0fffbc0e 100644 --- a/gptme/tools/computer.py +++ b/gptme/tools/computer.py @@ -133,6 +133,7 @@ def computer( run_xdotool(f"mousedown 1 mousemove --sync {x} {y} mouseup 1", display) print(f"Moved mouse to {x},{y}") + return None elif action in ("key", "type"): if not text: raise ValueError(f"text is required for {action}") @@ -147,6 +148,7 @@ def computer( display, ) print(f"Typed text: {text}") + return None elif action in ("left_click", "right_click", "middle_click", "double_click"): click_arg = { "left_click": "1", @@ -156,6 +158,7 @@ def computer( }[action] run_xdotool(f"click {click_arg}", display) print(f"Performed {action}") + return None elif action == "screenshot": # Use X11-specific screenshot if available, fall back to native output_dir = Path(OUTPUT_DIR) @@ -180,13 +183,14 @@ def computer( return view_image(path) else: print("Error: Screenshot failed") + return None elif action == "cursor_position": output = run_xdotool("getmouselocation --shell", display) x = int(output.split("X=")[1].split("\n")[0]) y = int(output.split("Y=")[1].split("\n")[0]) x, y = scale_coordinates(ScalingSource.COMPUTER, x, y, width, height) print(f"Cursor position: X={x},Y={y}") - + return None raise ValueError(f"Invalid action: {action}") diff --git a/scripts/Dockerfile.computer b/scripts/Dockerfile.computer index 292dc7b5..42c761e2 100644 --- a/scripts/Dockerfile.computer +++ b/scripts/Dockerfile.computer @@ -22,15 +22,25 @@ RUN apt-get update && \ x11vnc \ tint2 \ x11-apps \ - # Tools + # PPA req + software-properties-common \ + # Tools \ make \ git \ tmux \ curl \ pandoc \ netcat-openbsd \ - net-tools \ - && rm -rf /var/lib/apt/lists/* + net-tools + +# Install Firefox +RUN sudo add-apt-repository ppa:mozillateam/ppa && \ + sudo apt-get install -y --no-install-recommends firefox-esr + +RUN apt-get remove -y python3-blinker + +# Clean up +RUN apt-get clean && rm -rf /var/lib/apt/lists/* # Install noVNC for web access RUN git clone --branch v1.5.0 https://github.com/novnc/noVNC.git /opt/noVNC && \ diff --git a/scripts/computer_home/entrypoint.sh b/scripts/computer_home/entrypoint.sh index 8102f465..e846843d 100755 --- a/scripts/computer_home/entrypoint.sh +++ b/scripts/computer_home/entrypoint.sh @@ -5,7 +5,7 @@ set -e ./novnc_startup.sh # Start gptme server -python3 -m gptme.server --host 0.0.0.0 --port 8080 --tools python,computer +python3 -m gptme.server --host 0.0.0.0 --port 8080 --tools python,computer --cors-origin '*' # Keep the container running tail -f /dev/null