Skip to content

Commit

Permalink
Non anthropic OS mode
Browse files Browse the repository at this point in the history
  • Loading branch information
KillianLucas committed Nov 25, 2024
1 parent 7cc404c commit bdfe9aa
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 12 deletions.
95 changes: 84 additions & 11 deletions interpreter_1/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
import os
import platform
import sys
import time
import traceback
import uuid
from datetime import datetime
from typing import Any, cast

from prompt_toolkit import PromptSession
from readchar import readchar

from .misc.get_input import get_input
Expand All @@ -24,6 +24,7 @@
litellm.suppress_debug_info = True
litellm.REPEATED_STREAMING_CHUNK_LIMIT = 99999999
litellm.modify_params = True
# litellm.drop_params = True

from anthropic import Anthropic
from anthropic.types.beta import (
Expand Down Expand Up @@ -245,6 +246,10 @@ async def async_respond(self):
provider = self.provider # Keep existing provider if set
max_tokens = self.max_tokens # Keep existing max_tokens if set

if self.model == "claude-3-5-sonnet-latest":
# For some reason, Litellm can't find the model info for claude-3-5-sonnet-latest
provider = "anthropic"

# Only try to get model info if we need either provider or max_tokens
if provider is None or max_tokens is None:
try:
Expand Down Expand Up @@ -610,8 +615,53 @@ async def async_respond(self):
}
)
if "gui" in self.tools:
print("\nGUI is not supported for non-Anthropic models yet.\n")
pass
tools.append(
{
"type": "function",
"function": {
"name": "computer",
"description": """Control the computer's mouse, keyboard and screen interactions
* Coordinates are scaled to standard resolutions (max 1366x768)
* Screenshots are automatically taken after most actions
* For key commands, use normalized key names (e.g. 'pagedown' -> 'pgdn', 'enter'/'return' are interchangeable)
* On macOS, 'super+' is automatically converted to 'command+'
* Mouse movements use smooth easing for natural motion""",
"parameters": {
"type": "object",
"properties": {
"action": {
"type": "string",
"description": "The action to perform",
"enum": [
"key", # Send keyboard input (hotkeys or single keys)
"type", # Type text with a slight delay between characters
"mouse_move", # Move mouse cursor to coordinates
"left_click", # Perform left mouse click
"left_click_drag", # Click and drag from current pos to coordinates
"right_click", # Perform right mouse click
"middle_click", # Perform middle mouse click
"double_click", # Perform double left click
"screenshot", # Take a screenshot
"cursor_position", # Get current cursor coordinates
],
},
"text": {
"type": "string",
"description": "Text to type or key command to send (required for 'key' and 'type' actions)",
},
"coordinate": {
"type": "array",
"description": "X,Y coordinates for mouse actions (required for 'mouse_move' and 'left_click_drag')",
"items": {"type": "integer"},
"minItems": 2,
"maxItems": 2,
},
},
"required": ["action"],
},
},
}
)

if self.model.startswith("ollama/"):
# Fix ollama
Expand Down Expand Up @@ -645,6 +695,7 @@ async def async_respond(self):
"temperature": self.temperature,
"api_key": self.api_key,
"api_version": self.api_version,
"parallel_tool_calls": False,
}

if self.tool_calling:
Expand All @@ -658,7 +709,6 @@ async def async_respond(self):

if self.debug:
print("Sending request...", params)
import time

time.sleep(3)

Expand Down Expand Up @@ -815,13 +865,36 @@ async def async_respond(self):
result = ToolResult(output="Tool execution cancelled by user")

if self.tool_calling:
self.messages.append(
{
"role": "tool",
"content": json.dumps(dataclasses.asdict(result)),
"tool_call_id": tool_call.id,
}
)
if result.base64_image:
# Add image to tool result
self.messages.append(
{
"role": "tool",
"content": "The user will reply with the image outputted by the tool.",
"tool_call_id": tool_call.id,
}
)
self.messages.append(
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{result.base64_image}",
},
}
],
}
)
else:
self.messages.append(
{
"role": "tool",
"content": json.dumps(dataclasses.asdict(result)),
"tool_call_id": tool_call.id,
}
)
else:
self.messages.append(
{
Expand Down
4 changes: 3 additions & 1 deletion interpreter_1/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def __init__(self):
# Default values if no profile exists
# Model configuration
self.model = "claude-3-5-sonnet-latest" # The LLM model to use
self.provider = "anthropic" # The model provider (e.g. anthropic, openai) None will auto-detect
self.provider = (
None # The model provider (e.g. anthropic, openai) None will auto-detect
)
self.temperature = 0 # Sampling temperature for model outputs (0-1)
self.max_tokens = None # Maximum tokens in a message

Expand Down

0 comments on commit bdfe9aa

Please sign in to comment.