From 11cd87be262e7591d24c302fa13d4972ae63b719 Mon Sep 17 00:00:00 2001 From: hiento09 <136591877+hiento09@users.noreply.github.com> Date: Fri, 1 Nov 2024 13:33:24 +0700 Subject: [PATCH] Feat e2e test cortexso hub (#1590) * feat: e2e testing cortexso model hub * chore: schedule to run models test weekly * chore: resolve warning pytest * chore: use default branch cortexso hub --------- Co-authored-by: Hien To --- .github/workflows/test-cortexso-model-hub.yml | 109 ++++++++++++ engine/e2e-test/pytest.ini | 2 + engine/e2e-test/requirements.txt | 1 + .../test_api_cortexso_hub_llamacpp_engine.py | 166 ++++++++++++++++++ 4 files changed, 278 insertions(+) create mode 100644 .github/workflows/test-cortexso-model-hub.yml create mode 100644 engine/e2e-test/pytest.ini create mode 100644 engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py diff --git a/.github/workflows/test-cortexso-model-hub.yml b/.github/workflows/test-cortexso-model-hub.yml new file mode 100644 index 000000000..320369235 --- /dev/null +++ b/.github/workflows/test-cortexso-model-hub.yml @@ -0,0 +1,109 @@ +name: Test cortexso Model Hub + +on: + schedule: + - cron: "0 16 * * 5" # every Friday at 23:00 UTC+7 + workflow_dispatch: + +jobs: + build-and-test: + runs-on: ${{ matrix.runs-on }} + timeout-minutes: 1440 + strategy: + fail-fast: false + matrix: + include: + - os: "linux" + name: "amd64" + runs-on: "ubuntu-20-04-e2e-cortexcpp-model-hub" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.head_ref}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake" + build-deps-cmake-flags: "" + ccache-dir: "" + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: use python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install tools on Linux + run: | + sudo chown -R runner:runner /home/runner/cortexcpp + python3 -m pip install awscli + + - name: Download vcpkg cache from s3 + continue-on-error: true + run: | + aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux /home/runner/.cache/vcpkg --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0 + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + + - name: Configure vcpkg + run: | + cd engine + make configure-vcpkg + + - name: Build + run: | + cd engine + make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}" + + - name: Run unit tests + run: | + cd engine + make run-unit-tests + + - name: Run setup config for linux + shell: bash + run: | + cd engine + ./build/cortex --version + sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc + + - name: Run e2e tests + run: | + cd engine + cp build/cortex build/cortex-nightly + cp build/cortex build/cortex-beta + python -m pip install --upgrade pip + python -m pip install -r e2e-test/requirements.txt + pytest e2e-test/test_api_cortexso_hub_llamacpp_engine.py + rm build/cortex-nightly + rm build/cortex-beta + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_E2E }} + + - name: Pre-package + run: | + cd engine + make pre-package DESTINATION_BINARY_NAME="cortex" + + - name: Package + run: | + cd engine + make package + + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: cortex-${{ matrix.os }}-${{ matrix.name }} + path: ./engine/cortex + + + - name: Upload linux vcpkg cache to s3 + continue-on-error: true + if: always() + run: | + aws s3 sync /home/runner/.cache/vcpkg s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux --endpoint ${{ secrets.MINIO_ENDPOINT }} + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" diff --git a/engine/e2e-test/pytest.ini b/engine/e2e-test/pytest.ini new file mode 100644 index 000000000..0102b0a97 --- /dev/null +++ b/engine/e2e-test/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +asyncio_default_fixture_loop_scope = function diff --git a/engine/e2e-test/requirements.txt b/engine/e2e-test/requirements.txt index f0eabb974..05b47e0b0 100644 --- a/engine/e2e-test/requirements.txt +++ b/engine/e2e-test/requirements.txt @@ -2,3 +2,4 @@ websockets pytest pytest-asyncio requests +pyyaml \ No newline at end of file diff --git a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py new file mode 100644 index 000000000..e13c4827a --- /dev/null +++ b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py @@ -0,0 +1,166 @@ +import pytest +import requests +import os +import yaml + +from pathlib import Path +from test_runner import ( + run, + start_server, + stop_server, + wait_for_websocket_download_success_event, +) + +collection_id = "cortexso/local-models-6683a6e29e8f3018845b16db" +token = os.getenv("HF_TOKEN") +if not token: + raise ValueError("HF_TOKEN environment variable not set") + +def get_repos_in_collection(collection_id, token): + # API endpoint to get list of repos in the collection + url = f"https://huggingface.co/api/collections/{collection_id}" + headers = {"Authorization": f"Bearer {token}"} + response = requests.get(url, headers=headers) + + # Check response and retrieve repo IDs if successful + if response.status_code == 200: + return [repo['id'] for repo in response.json()["items"]] + else: + print("Error fetching repos:", response.status_code, response.json()) + return [] + +def get_repo_default_branch(repo_id, token): + # Direct link to metadata.yaml on the main branch + url = f"https://huggingface.co/{repo_id}/resolve/main/metadata.yml" + headers = {"Authorization": f"Bearer {token}"} + response = requests.get(url, headers=headers) + + # Check response and retrieve the 'default' field value + if response.status_code == 200: + # Read YAML content from response text + metadata = yaml.safe_load(response.text) + return metadata.get("default") + else: + print(f"Error fetching metadata for {repo_id}:", response.status_code, response.json()) + return None + +def get_all_repos_and_default_branches_from_metadata(collection_id, token): + # Get list of repos from the collection + repos = get_repos_in_collection(collection_id, token) + combined_list = [] + + # Iterate over each repo and fetch the default branch from metadata + for repo_id in repos: + default_branch = get_repo_default_branch(repo_id, token) + if default_branch and "gguf" in default_branch: + combined_list.append(f"{repo_id.split('/')[1]}:{default_branch}") + + return combined_list + +#Call the function and print the results +repo_branches = get_all_repos_and_default_branches_from_metadata(collection_id, token) + +class TestCortexsoModels: + + @pytest.fixture(autouse=True) + def setup_and_teardown(self, request): + # Setup + success = start_server() + if not success: + raise Exception("Failed to start server") + # Delete model if exists + for model_url in repo_branches: + run( + "Delete model", + [ + "models", + "delete", + model_url, + ], + ) + yield + + # Teardown + for model_url in repo_branches: + run( + "Delete model", + [ + "models", + "delete", + model_url, + ], + ) + stop_server() + + @pytest.mark.parametrize("model_url", repo_branches) + @pytest.mark.asyncio + async def test_models_on_cortexso_hub(self, model_url): + + # Pull model from cortexso hub + json_body = { + "model": model_url + } + response = requests.post("http://localhost:3928/models/pull", json=json_body) + assert response.status_code == 200, f"Failed to pull model: {model_url}" + + await wait_for_websocket_download_success_event(timeout=None) + + # Check if the model was pulled successfully + get_model_response = requests.get( + f"http://127.0.0.1:3928/models/{model_url}" + ) + assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}" + assert ( + get_model_response.json()["model"] == model_url + ), f"Unexpected model name for: {model_url}" + + # Check if the model is available in the list of models + response = requests.get("http://localhost:3928/models") + assert response.status_code == 200 + models = [i["id"] for i in response.json()["data"]] + assert model_url in models, f"Model not found in list: {model_url}" + + # Install Engine + exit_code, output, error = run( + "Install Engine", ["engines", "install", "llama-cpp"], timeout=None, capture = False + ) + root = Path.home() + assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt") + assert exit_code == 0, f"Install engine failed with error: {error}" + + # Start the model + response = requests.post("http://localhost:3928/models/start", json=json_body) + assert response.status_code == 200, f"status_code: {response.status_code}" + + # Send an inference request + inference_json_body = { + "frequency_penalty": 0.2, + "max_tokens": 4096, + "messages": [ + { + "content": "", + "role": "user" + } + ], + "model": model_url, + "presence_penalty": 0.6, + "stop": [ + "End" + ], + "stream": False, + "temperature": 0.8, + "top_p": 0.95 + } + response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"}) + assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}" + + # Stop the model + response = requests.post("http://localhost:3928/models/stop", json=json_body) + assert response.status_code == 200, f"status_code: {response.status_code}" + + # Uninstall Engine + exit_code, output, error = run( + "Uninstall engine", ["engines", "uninstall", "llama-cpp"] + ) + assert "Engine llama-cpp uninstalled successfully!" in output + assert exit_code == 0, f"Install engine failed with error: {error}"