Skip to content

Commit

Permalink
Integrate benchmark and autogpt (#5208)
Browse files Browse the repository at this point in the history
Signed-off-by: Merwane Hamadi <[email protected]>
  • Loading branch information
waynehamadi authored Sep 13, 2023
1 parent 69dadee commit b2fc8f2
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 22 deletions.
2 changes: 1 addition & 1 deletion autogpts/autogpt/agbenchmark_config/config.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"workspace": "auto_gpt_workspace", "entry_path": "agbenchmark.benchmarks"}
{"workspace": {"input": "auto_gpt_workspace", "output":"auto_gpt_workspace" }, "entry_path": "agbenchmark.benchmarks"}
3 changes: 3 additions & 0 deletions autogpts/autogpt/challenges_already_beaten.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"TestWriteFile": true
}
8 changes: 2 additions & 6 deletions benchmark/agbenchmark/agent_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,15 @@ def run_windows_env(process: Any, start_time: float, timeout: float) -> None:


def run_agent(task: str, timeout: int, agent_config: AgentBenchmarkConfig) -> None:
"""Calling to get a response"""
print(f"Running agbenchmark/benchmarks.py with timeout {timeout}")

entry_path = agent_config.get_agent_entry_path()
print(f"Running '{entry_path}' with timeout {timeout}")

command = [sys.executable, entry_path, str(task)]
command = [sys.executable, "-m", "agbenchmark_config.benchmarks", str(task)]

process = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
cwd=agent_config.get_agent_directory(),
bufsize=1,
)

Expand Down
23 changes: 10 additions & 13 deletions benchmark/agbenchmark/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def load_config_from_request(request: Any) -> AgentBenchmarkConfig:
raise


def resolve_workspace(workspace: Path) -> Path:
def resolve_workspace_path(workspace: Path) -> Path:
"""
This function resolves the workspace path.
Expand Down Expand Up @@ -83,10 +83,10 @@ def resolve_workspace(workspace: Path) -> Path:
return path_value
else:
raise ValueError("Invalid workspace path expression.")
elif isinstance(workspace, Path):
return os.path.abspath(workspace)
elif isinstance(workspace, str):
return os.path.abspath(Path.cwd() / workspace)
else:
raise ValueError("Invalid workspace type. Expected str or Path.")
raise ValueError("Invalid workspace type. Expected str")


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -119,15 +119,12 @@ def config(request: Any) -> Any:

config["AgentBenchmarkConfig"] = agent_benchmark_config

if isinstance(config["workspace"], str):
config["workspace"] = resolve_workspace(agent_benchmark_config.workspace)
else: # it's a input output dict
config["workspace"]["input"] = resolve_workspace(
agent_benchmark_config.workspace / "input"
)
config["workspace"]["output"] = resolve_workspace(
agent_benchmark_config.workspace / "output"
)
config["workspace"]["input"] = resolve_workspace_path(
agent_benchmark_config.workspace.input
)
config["workspace"]["output"] = resolve_workspace_path(
agent_benchmark_config.workspace.output
)

return config

Expand Down
6 changes: 4 additions & 2 deletions benchmark/agbenchmark/utils/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ class DifficultyLevel(Enum):
expert = "expert"
human = "human"


class Workspace(BaseModel):
input: str
output: str
# map from enum to difficulty level (numeric)
DIFFICULTY_MAP = {
DifficultyLevel.interface: 1,
Expand Down Expand Up @@ -85,7 +87,7 @@ class AgentBenchmarkConfig(BaseModel):

agent_benchmark_config_path: Path | None = None
entry_path: str
workspace: Path
workspace: Workspace
reports_folder: Path | None = None
api_mode: bool = False
host: str | None
Expand Down

0 comments on commit b2fc8f2

Please sign in to comment.