Skip to content

Commit

Permalink
Merge pull request #33 from synacktraa/main
Browse files Browse the repository at this point in the history
feat: use uv for running scripts in venvs
  • Loading branch information
synacktraa authored Feb 16, 2025
2 parents 32f1676 + 63a1574 commit 346226c
Show file tree
Hide file tree
Showing 7 changed files with 875 additions and 182 deletions.
12 changes: 6 additions & 6 deletions benchmarking/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ COHERE_API_KEY=

### Fetch evals from supabase
```
python fetch_evals.py
uv run fetch_evals.py
```

### Generating results
Expand All @@ -22,27 +22,27 @@ COHERE_API_KEY=

- Anthropic Grounding Locatr
```
python grounding_locatr.py anthropic
uv run grounding_locatr.py anthropic
```

- OS Atlas Grounding Locatr
```
python grounding_locatr.py os_atlas
uv run grounding_locatr.py os_atlas
```

### Comparing results with evals

- Original Locatr
```
python compare.py original
uv run compare.py original
```

- Anthropic Grounding Locatr
```
python compare.py anthropic
uv run compare.py anthropic
```

- OS Atlas Grounding Locatr
```
python compare.py os_atlas
uv run compare.py os_atlas
```
3 changes: 2 additions & 1 deletion benchmarking/compare.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys
from pathlib import Path

import yaml
import sys
from playwright.sync_api import sync_playwright

if len(sys.argv) == 2:
Expand Down
5 changes: 3 additions & 2 deletions benchmarking/fetch_evals.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os
import yaml
import re
from urllib.parse import urlparse, urlunparse
from supabase import create_client

import yaml
from postgrest.exceptions import APIError
from supabase import create_client

# Initialize Supabase client
supabase= create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
Expand Down
16 changes: 7 additions & 9 deletions benchmarking/grounding_locatr.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
from playwright.sync_api import sync_playwright
from pathlib import Path
import logging
import re
import sys
import yaml
from base64 import standard_b64encode
from dataclasses import dataclass, asdict
from typing import Literal
from pathlib import Path
from base64 import standard_b64encode
from textwrap import dedent
from json_repair import repair_json
import logging
from typing import Literal

import yaml
from anthropic import Anthropic

from dotenv import load_dotenv

from gradio_client import Client, handle_file
from json_repair import repair_json
from PIL import Image, ImageDraw
from playwright.sync_api import sync_playwright


load_dotenv()

Expand Down
16 changes: 16 additions & 0 deletions benchmarking/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[project]
name = "benchmarking"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12.0"
dependencies = [
"anthropic>=0.45.2",
"gradio-client>=1.7.0",
"json-repair>=0.37.0",
"pillow>=11.1.0",
"playwright>=1.50.0",
"python-dotenv>=1.0.1",
"pyyaml>=6.0.2",
"supabase>=2.13.0",
]
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,7 @@ exclude_lines = [
"if __name__ == .__main__.:",
"pass",
"raise ImportError",
]
]

[tool.uv.workspace]
members = ["benchmarking"]
1,000 changes: 837 additions & 163 deletions uv.lock

Large diffs are not rendered by default.

0 comments on commit 346226c

Please sign in to comment.