diff --git a/gptme/eval/evals.py b/gptme/eval/evals.py
index 687fa18a..2b57e33d 100644
--- a/gptme/eval/evals.py
+++ b/gptme/eval/evals.py
@@ -36,6 +36,22 @@ def check_output_hello_ask(ctx):
     return "Hello, Erik!" in ctx.stdout
 
 
+def check_package_json(ctx):
+    return "package.json" in ctx.files
+
+
+def check_output_compiled_successfully(ctx):
+    return "Compiled successfully" in ctx.stdout
+
+
+def check_output_erik(ctx):
+    return "Erik" in ctx.stdout
+
+
+def check_cargo_toml(ctx):
+    return "Cargo.toml" in ctx.files
+
+
 tests: list["ExecTest"] = [
     {
         "name": "hello",
@@ -88,6 +104,34 @@ def check_output_hello_ask(ctx):
             "we have a commit": check_commit_exists,
         },
     },
+    {
+        "name": "init-react",
+        "files": {},
+        "run": "npm run build",
+        "prompt": "create a react project in the current directory, try to build it, but dont start the server and dont use git",
+        "expect": {
+            "package.json exists": check_package_json,
+            "builds successfully": check_output_compiled_successfully,
+        },
+    },
+    {
+        "name": "init-rust",
+        "files": {},
+        "run": "cargo build",
+        "prompt": "create a Rust project in the current directory",
+        "expect": {
+            "Cargo.toml exists": check_cargo_toml,
+        },
+    },
+    {
+        "name": "whois-superuserlabs-ceo",
+        "files": {},
+        "run": "cat answer.txt",
+        "prompt": "who is the CEO of Superuser Labs? write the answer to answer.txt",
+        "expect": {
+            "correct output": check_output_erik,
+        },
+    },
     # Fails, gets stuck on interactive stuff
     # {
     #     "name": "init-vue-ts-tailwind",
diff --git a/gptme/eval/main.py b/gptme/eval/main.py
index 1b553b40..4d89c701 100644
--- a/gptme/eval/main.py
+++ b/gptme/eval/main.py
@@ -8,6 +8,7 @@
 import inspect
 import io
 import logging
+import os
 import signal
 import subprocess
 import sys
@@ -56,6 +57,8 @@ class ProcessError:
 
 def act_process(agent, files, prompt, queue: "Queue[ProcessResult]"):
     # Runs in a process for each eval
+    # each eval has a process group, so we can kill all child processes
+    os.setpgrp()
 
     # redirect stdout and stderr to streams
     stdout, stderr = io.StringIO(), io.StringIO()
@@ -67,6 +70,8 @@ def error_handler(e):
         sys.stdout, sys.stderr = stdout_orig, stderr_orig
         print(f"Error: {e}")
         queue.put(ProcessError(str(e), stdout.getvalue(), stderr.getvalue(), duration))
+        # kill child processes
+        os.killpg(0, signal.SIGKILL)
         sys.exit(1)
 
     # handle SIGTERM
@@ -98,7 +103,7 @@ def execute(test: ExecTest, agent: Agent, timeout: int) -> ExecResult:
     Executes the code for a specific model with a timeout.
     """
     print(
-        f"Running test {test['name']} with prompt: {test['prompt']} for model: {agent.model}"
+        f'Running "{test["name"]}" with prompt "{test["prompt"]}" for model: {agent.model}'
     )
 
     queue: Queue[ProcessResult] = Queue()
@@ -114,7 +119,7 @@ def execute(test: ExecTest, agent: Agent, timeout: int) -> ExecResult:
     if p.is_alive():
         print("Timeout reached, terminating process")
         p.terminate()
-        p.join()
+        p.join(timeout=1)
         status = "timeout"
         time_gen = timeout
 
@@ -129,7 +134,9 @@ def execute(test: ExecTest, agent: Agent, timeout: int) -> ExecResult:
             "stderr": "",
         }
 
-    result = queue.get()
+    logger.info("Getting result from queue")
+    result = queue.get(timeout=1)
+    logger.info("Got result")
     if status == "success":
         time_gen = result.duration
     stdout, stderr = result.stdout, result.stderr
@@ -150,6 +157,7 @@ def execute(test: ExecTest, agent: Agent, timeout: int) -> ExecResult:
     run_start = time.time()
     env = SimpleExecutionEnv()
     env.upload(files)
+    logger.info(f"Running check: {test['run']}")
     stdout_run, stderr_run, exit_code = env.run(test["run"])
     time_run = time.time() - run_start