Skip to content

Commit

Permalink
feat: add magic @log workspace for ./logdir/workspace, use it for…
Browse files Browse the repository at this point in the history
… evals
  • Loading branch information
ErikBjare committed Aug 26, 2024
1 parent d914810 commit 9b9d942
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 32 deletions.
2 changes: 1 addition & 1 deletion gptme.toml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
files = ["README.md", "gptme/cli.py", "docs/*.rst", "docs/*.md"]
files = ["README.md", "Makefile", "gptme/cli.py", "docs/*.rst", "docs/*.md"]
36 changes: 28 additions & 8 deletions gptme/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
)
@click.option(
"--workspace",
help="Path to workspace directory.",
help="Path to workspace directory. Pass '@log' to create a workspace in the log directory.",
default=".",
)
def main(
Expand Down Expand Up @@ -153,13 +153,8 @@ def main(
if no_confirm:
logger.warning("Skipping all confirmation prompts.")

workspace_prompt = get_workspace_prompt(workspace)

# get initial system prompt
initial_msgs = [get_prompt(prompt_system)]
initial_msgs[
0
].content += f"\n\nSelected project files, read more with cat: {workspace_prompt}"

# if stdin is not a tty, we're getting piped input, which we should include in the prompt
if not sys.stdin.isatty():
Expand Down Expand Up @@ -196,6 +191,7 @@ def main(
no_confirm,
interactive,
show_hidden,
workspace,
)


Expand All @@ -208,12 +204,14 @@ def chat(
no_confirm: bool = False,
interactive: bool = True,
show_hidden: bool = False,
workspace: str = ".",
):
"""
Run the chat loop.
prompt_msgs: list of messages to execute in sequence.
initial_msgs: list of history messages.
workspace: path to workspace directory, or @log to create one in the log directory.
Callable from other modules.
"""
Expand All @@ -227,6 +225,28 @@ def chat(
print(f"Using logdir {logfile.parent}")
log = LogManager.load(logfile, initial_msgs=initial_msgs, show_hidden=show_hidden)

# change to workspace directory
# use if exists, create if @log, or use given path
if (logfile.parent / "workspace").exists():
assert workspace in ["@log", "."], "Workspace already exists"
workspace_path = logfile.parent / "workspace"
print(f"Using workspace at {workspace_path}")
elif workspace == "@log":
workspace_path = logfile.parent / "workspace"
print(f"Creating workspace at {workspace_path}")
os.makedirs(workspace_path, exist_ok=True)
else:
workspace_path = Path(workspace)
assert (
workspace_path.exists()
), f"Workspace path {workspace_path} does not exist"
os.chdir(workspace_path)

# check if workspace already exists
workspace_prompt = get_workspace_prompt(str(workspace_path))
if workspace_prompt:
log.append(Message("system", workspace_prompt))

# print log
log.print()
print("--- ^^^ past messages ^^^ ---")
Expand Down Expand Up @@ -388,8 +408,8 @@ def get_logfile(name: str | Literal["random", "resume"], interactive=True) -> Pa
for f in prev_conv_files
]

# don't run pick in tests/non-interactive mode
if interactive:
# don't run pick in tests/non-interactive mode, or if the user specifies a name
if interactive and name not in ["random", "ask"]:
options = [
NEW_CONV,
] + prev_convs
Expand Down
6 changes: 1 addition & 5 deletions gptme/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,6 @@ def set_config_value(key: str, value: str) -> None:


def get_workspace_prompt(workspace: str) -> str:
if not os.path.exists(workspace):
logger.error(f"Workspace directory {workspace} does not exist")
exit(1)
os.chdir(workspace)
project_config_paths = [
p
for p in (
Expand All @@ -148,7 +144,7 @@ def get_workspace_prompt(workspace: str) -> str:
f"File {file} specified in project config does not exist"
)
exit(1)
return "\n\nSelected project files, read more with cat:\n" + "\n".join(
return "\n\nSelected project files, read more with cat:\n" + "\n\n".join(
[f"```{Path(file).name}\n{Path(file).read_text()}\n```" for file in files]
)
return ""
Expand Down
21 changes: 16 additions & 5 deletions gptme/eval/agents.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import os
import logging
from abc import abstractmethod

from gptme import Message
from gptme import chat as gptme_chat
from gptme import get_prompt
from gptme.cli import get_name

from .filestore import FileStore
from .types import Files

logger = logging.getLogger(__name__)


class Agent:
def __init__(self, model: str):
Expand All @@ -23,14 +26,21 @@ def act(self, files: Files | None, prompt: str) -> Files:

class GPTMe(Agent):
def act(self, files: Files | None, prompt: str):
store = FileStore()
os.chdir(store.working_dir) # can now modify store content
_id = abs(hash(prompt)) % 1000000
name = f"gptme-evals-{self.model.replace('/', '--')}-{_id}"
logdir = get_name(name)
workspace_dir = logdir / "workspace"
if workspace_dir.exists():
raise FileExistsError(
f"Workspace directory {workspace_dir} already exists. "
)

store = FileStore(working_dir=workspace_dir)
if files:
store.upload(files)

print("\n--- Start of generation ---")
print(f"Working in {store.working_dir}")
logger.debug(f"Working in {store.working_dir}")
prompt_sys = get_prompt()
prompt_sys.content += (
"\n\nIf you have trouble and dont seem to make progress, stop trying."
Expand All @@ -40,10 +50,11 @@ def act(self, files: Files | None, prompt: str):
gptme_chat(
[Message("user", prompt)],
[prompt_sys],
f"gptme-evals-{store.id}",
name=name,
model=self.model,
no_confirm=True,
interactive=False,
workspace="@log", # this will be the same directory as workspace_dir
)
# don't exit on sys.exit()
except (SystemExit, KeyboardInterrupt):
Expand Down
8 changes: 5 additions & 3 deletions gptme/eval/filestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@


class FileStore:
def __init__(self):
self.working_dir = Path(tempfile.mkdtemp(prefix="gptme-evals-"))
def __init__(self, working_dir: Path | None = None):
if working_dir:
self.working_dir = working_dir
else:
self.working_dir = Path(tempfile.mkdtemp(prefix="gptme-evals-"))
self.working_dir.mkdir(parents=True, exist_ok=True)
self.id = self.working_dir.name.split("-")[-1]

def upload(self, files: Files):
for name, content in files.items():
Expand Down
40 changes: 30 additions & 10 deletions gptme/eval/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,23 +65,43 @@ class ProcessError:
ProcessResult = Union[ProcessSuccess, ProcessError]


class StreamTee(io.TextIOBase):
"""Capture stdout or stderr to a stream and optionally keep original streams intact."""

# NOTE: toggling keep_stream can be useful for debugging
def __init__(self, stream, keep_stream=False):
self.stream = stream
self.captured = io.StringIO()
self.keep_stream = keep_stream

def write(self, message) -> int:
self.captured.write(message)
if self.keep_stream:
self.stream.write(message)
return len(message)

def getvalue(self):
return self.captured.getvalue()


def act_process(agent, files, prompt, queue: "Queue[ProcessResult]"):
# Runs in a process for each eval
# each eval has a process group, so we can kill all child processes
os.setpgrp()

# redirect stdout and stderr to streams
stdout, stderr = io.StringIO(), io.StringIO()
stdout_orig, stderr_orig = sys.stdout, sys.stderr
sys.stdout, sys.stderr = stdout, stderr
stdout = StreamTee(sys.stdout)
stderr = StreamTee(sys.stderr)
sys.stdout, sys.stderr = stdout, stderr # type: ignore

def error_handler(e):
duration = time.time() - start
sys.stdout, sys.stderr = stdout_orig, stderr_orig
sys.stdout, sys.stderr = stdout.stream, stderr.stream
print(f"Error: {e}")
queue.put(ProcessError(str(e), stdout.getvalue(), stderr.getvalue(), duration))
# kill child processes
# os.killpg(0, signal.SIGKILL)

sys.exit(1)

# handle SIGTERM
Expand All @@ -93,9 +113,9 @@ def sigterm_handler(*_):
start = time.time()
files = agent.act(files, prompt)
duration = time.time() - start
sys.stdout, sys.stderr = stdout_orig, stderr_orig
sys.stdout, sys.stderr = stdout.stream, stderr.stream
queue.put(ProcessSuccess(files, stdout.getvalue(), stderr.getvalue(), duration))
print("Process finished")
print("Process finished successfully")
# It seems that adding this prevents the queue from syncing or something, maybe SIGKILL is too harsh...
# os.killpg(0, signal.SIGKILL)

Expand All @@ -105,7 +125,7 @@ def execute(test: ExecTest, agent: Agent, timeout: int) -> ExecResult:
"""
Executes the code for a specific model with a timeout.
"""
print(
logger.info(
f'Running "{test["name"]}" with prompt "{test["prompt"]}" for model: {agent.model}'
)

Expand All @@ -120,7 +140,7 @@ def execute(test: ExecTest, agent: Agent, timeout: int) -> ExecResult:

status: Status = "success"
if p.is_alive():
print("Timeout reached, terminating process")
logger.info("Timeout reached, terminating process")
p.terminate()
p.join(timeout=1)
status = "timeout"
Expand All @@ -141,7 +161,7 @@ def execute(test: ExecTest, agent: Agent, timeout: int) -> ExecResult:
}

logger.info("Got result")
if status == "success":
if status != "timeout":
time_gen = result.duration
stdout, stderr = result.stdout, result.stderr

Expand Down Expand Up @@ -169,7 +189,7 @@ def execute(test: ExecTest, agent: Agent, timeout: int) -> ExecResult:

ctx = ResultContext(files, stdout_run, stderr_run, exit_code)
results: list[CaseResult] = []
print(f"\n--- Results for {test['name']} ---")
print(f"\n--- Results for '{test['name']}' with {agent.model} ---")
for name, case in test["expect"].items():
code = inspect.getsource(case).strip()
eval_start = time.time()
Expand Down

0 comments on commit 9b9d942

Please sign in to comment.