diff --git a/Makefile b/Makefile index 23a3ff5d..11743302 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ build-docker-dev: docker build . -t gptme-dev:latest -f scripts/Dockerfile.dev build-docker-full: - docker build . -t gptme-eval:latest -f scripts/Dockerfile.eval --build-arg RUST=yes --build-arg BROWSER=yes + docker build . -t gptme-eval:latest -f scripts/Dockerfile.eval --build-arg RUST=yes --build-arg PLAYWRIGHT=no test: @# if SLOW is not set, pass `-m "not slow"` to skip slow tests diff --git a/gptme/tools/_browser_lynx.py b/gptme/tools/_browser_lynx.py index ad470ae3..f91f8b7b 100644 --- a/gptme/tools/_browser_lynx.py +++ b/gptme/tools/_browser_lynx.py @@ -2,28 +2,51 @@ Browser tool by calling lynx --dump """ +import os import subprocess -def read_url(url): - return subprocess.run( - ["lynx", "--dump", url, "--display_charset=utf-8"], stdout=subprocess.PIPE - ).stdout.decode("utf-8") - - -def search(query, engine="google"): +def read_url(url, cookies: dict | None = None) -> str: + env = os.environ.copy() + # TODO: create and set LYNX_CFG to use custom lynx config file (needed to save cookies, which I need to debug how cookies should be read) + # env["LYNX_CFG"] = str(Path("~/.config/lynx/lynx.cfg").expanduser()) + if cookies: + # save them to file to be read by lynx + pass + # with open(Path("~/.lynx_cookies").expanduser(), "w") as f: + # for k, v in cookies.items(): + # f.write(f"{k}\t{v}\n") + p = subprocess.run( + ["lynx", "--dump", url, "--display_charset=utf-8"], + env=env, + check=True, + capture_output=True, + ) + # should be utf-8, but we can't be sure + return p.stdout.decode("utf-8", errors="replace") + + +def search(query, engine="duckduckgo"): if engine == "google": - return read_url(f"https://www.google.com/search?q={query}") + # TODO: we need to figure out a way to remove the consent banner to access google search results + # otherwise google is not usable + return read_url( + f"https://www.google.com/search?q={query}&hl=en", + cookies={"CONSENT+": "YES+42"}, + ) elif engine == "duckduckgo": - return read_url(f"https://duckduckgo.com/?q={query}") + return read_url(f"https://lite.duckduckgo.com/lite/?q={query}") raise ValueError(f"Unknown search engine: {engine}") def test_read_url(): - print(read_url("https://gptme.org/")) - print(read_url("https://github.com/ErikBjare/gptme/issues/205")) + content = read_url("https://gptme.org/") + assert "Getting Started" in content + content = read_url("https://github.com/ErikBjare/gptme/issues/205") + assert "lynx-backed browser tool" in content def test_search(): - print(search("Python", "google")) - print(search("Python", "duckduckgo")) + # result = search("Python", "google") + result = search("Erik Bjäreholt", "duckduckgo") + assert "erik.bjareholt.com" in result diff --git a/scripts/Dockerfile.eval b/scripts/Dockerfile.eval index 1977cf5f..546e88e8 100644 --- a/scripts/Dockerfile.eval +++ b/scripts/Dockerfile.eval @@ -21,13 +21,15 @@ RUN if [ "$RUST" = "yes" ]; then \ apt-get update && apt-get install build-essential -y; \ fi -# Install playwright if browser is enabled -ARG BROWSER=no +# Install playwright if enabled, else just install lynx +ARG PLAYWRIGHT=no USER root -RUN if [ "$BROWSER" = "yes" ]; then \ +RUN if [ "$PLAYWRIGHT" = "yes" ]; then \ poetry install -E browser --without=dev; \ poetry run playwright install-deps; \ su appuser -c "poetry run playwright install chromium"; \ + else \ + apt-get update && apt-get install lynx -y; \ fi # Create eval_results directory @@ -41,4 +43,4 @@ RUN git config --global user.email "gptme@superuserlabs.org" RUN git config --global init.defaultBranch main # Add an entry point for running evals -ENTRYPOINT ["poetry", "run", "python", "-m", "gptme.eval"] +ENTRYPOINT ["python", "-m", "gptme.eval"]