From 11cd87be262e7591d24c302fa13d4972ae63b719 Mon Sep 17 00:00:00 2001
From: hiento09 <136591877+hiento09@users.noreply.github.com>
Date: Fri, 1 Nov 2024 13:33:24 +0700
Subject: [PATCH] Feat e2e test cortexso hub (#1590)

* feat: e2e testing cortexso model hub

* chore: schedule to run models test weekly

* chore: resolve warning pytest

* chore: use default branch cortexso hub

---------

Co-authored-by: Hien To <tominhhien97@gmail.com>
---
 .github/workflows/test-cortexso-model-hub.yml | 109 ++++++++++++
 engine/e2e-test/pytest.ini                    |   2 +
 engine/e2e-test/requirements.txt              |   1 +
 .../test_api_cortexso_hub_llamacpp_engine.py  | 166 ++++++++++++++++++
 4 files changed, 278 insertions(+)
 create mode 100644 .github/workflows/test-cortexso-model-hub.yml
 create mode 100644 engine/e2e-test/pytest.ini
 create mode 100644 engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py

diff --git a/.github/workflows/test-cortexso-model-hub.yml b/.github/workflows/test-cortexso-model-hub.yml
new file mode 100644
index 000000000..320369235
--- /dev/null
+++ b/.github/workflows/test-cortexso-model-hub.yml
@@ -0,0 +1,109 @@
+name: Test cortexso Model Hub
+
+on:
+  schedule:
+    - cron: "0 16 * * 5" # every Friday at 23:00 UTC+7
+  workflow_dispatch:
+
+jobs:
+  build-and-test:
+    runs-on: ${{ matrix.runs-on }}
+    timeout-minutes: 1440
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: "linux"
+            name: "amd64"
+            runs-on: "ubuntu-20-04-e2e-cortexcpp-model-hub"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.head_ref}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
+            build-deps-cmake-flags: ""
+            ccache-dir: ""
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+
+      - name: use python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install tools on Linux
+        run: |
+          sudo chown -R runner:runner /home/runner/cortexcpp
+          python3 -m pip install awscli
+
+      - name: Download vcpkg cache from s3
+        continue-on-error: true
+        run: |
+          aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux /home/runner/.cache/vcpkg  --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0
+        env:
+          AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
+          AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
+
+      - name: Configure vcpkg
+        run: |
+          cd engine
+          make configure-vcpkg
+
+      - name: Build
+        run: |
+          cd engine
+          make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}"
+
+      - name: Run unit tests
+        run: |
+          cd engine
+          make run-unit-tests
+
+      - name: Run setup config for linux
+        shell: bash
+        run: |
+          cd engine
+          ./build/cortex --version
+          sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc
+
+      - name: Run e2e tests
+        run: |
+          cd engine
+          cp build/cortex build/cortex-nightly
+          cp build/cortex build/cortex-beta
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          pytest e2e-test/test_api_cortexso_hub_llamacpp_engine.py
+          rm build/cortex-nightly
+          rm build/cortex-beta
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_E2E }}
+
+      - name: Pre-package
+        run: |
+          cd engine
+          make pre-package DESTINATION_BINARY_NAME="cortex"
+
+      - name: Package
+        run: |
+          cd engine
+          make package
+
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: cortex-${{ matrix.os }}-${{ matrix.name }}
+          path: ./engine/cortex
+
+
+      - name: Upload linux vcpkg cache to s3
+        continue-on-error: true
+        if: always()
+        run: |
+          aws s3 sync /home/runner/.cache/vcpkg s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux  --endpoint ${{ secrets.MINIO_ENDPOINT }}
+        env:
+          AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
+          AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
diff --git a/engine/e2e-test/pytest.ini b/engine/e2e-test/pytest.ini
new file mode 100644
index 000000000..0102b0a97
--- /dev/null
+++ b/engine/e2e-test/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+asyncio_default_fixture_loop_scope = function
diff --git a/engine/e2e-test/requirements.txt b/engine/e2e-test/requirements.txt
index f0eabb974..05b47e0b0 100644
--- a/engine/e2e-test/requirements.txt
+++ b/engine/e2e-test/requirements.txt
@@ -2,3 +2,4 @@ websockets
 pytest
 pytest-asyncio
 requests
+pyyaml
\ No newline at end of file
diff --git a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
new file mode 100644
index 000000000..e13c4827a
--- /dev/null
+++ b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
@@ -0,0 +1,166 @@
+import pytest
+import requests
+import os
+import yaml
+
+from pathlib import Path
+from test_runner import (
+    run,
+    start_server,
+    stop_server,
+    wait_for_websocket_download_success_event,
+)
+
+collection_id = "cortexso/local-models-6683a6e29e8f3018845b16db"
+token = os.getenv("HF_TOKEN")
+if not token:
+    raise ValueError("HF_TOKEN environment variable not set")
+
+def get_repos_in_collection(collection_id, token):
+    # API endpoint to get list of repos in the collection
+    url = f"https://huggingface.co/api/collections/{collection_id}"
+    headers = {"Authorization": f"Bearer {token}"}
+    response = requests.get(url, headers=headers)
+
+    # Check response and retrieve repo IDs if successful
+    if response.status_code == 200:
+        return [repo['id'] for repo in response.json()["items"]]
+    else:
+        print("Error fetching repos:", response.status_code, response.json())
+        return []
+
+def get_repo_default_branch(repo_id, token):
+    # Direct link to metadata.yaml on the main branch
+    url = f"https://huggingface.co/{repo_id}/resolve/main/metadata.yml"
+    headers = {"Authorization": f"Bearer {token}"}
+    response = requests.get(url, headers=headers)
+
+    # Check response and retrieve the 'default' field value
+    if response.status_code == 200:
+        # Read YAML content from response text
+        metadata = yaml.safe_load(response.text)
+        return metadata.get("default")
+    else:
+        print(f"Error fetching metadata for {repo_id}:", response.status_code, response.json())
+        return None
+
+def get_all_repos_and_default_branches_from_metadata(collection_id, token):
+    # Get list of repos from the collection
+    repos = get_repos_in_collection(collection_id, token)
+    combined_list = []
+
+    # Iterate over each repo and fetch the default branch from metadata
+    for repo_id in repos:
+        default_branch = get_repo_default_branch(repo_id, token)
+        if default_branch and "gguf" in default_branch:
+            combined_list.append(f"{repo_id.split('/')[1]}:{default_branch}")
+
+    return combined_list
+
+#Call the function and print the results
+repo_branches = get_all_repos_and_default_branches_from_metadata(collection_id, token)
+
+class TestCortexsoModels:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self, request):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+        # Delete model if exists
+        for model_url in repo_branches:
+            run(
+                "Delete model",
+                [
+                    "models",
+                    "delete",
+                    model_url,
+                ],
+            )
+        yield
+
+        # Teardown
+        for model_url in repo_branches:
+            run(
+                "Delete model",
+                [
+                    "models",
+                    "delete",
+                    model_url,
+                ],
+            )
+        stop_server()
+
+    @pytest.mark.parametrize("model_url", repo_branches)
+    @pytest.mark.asyncio
+    async def test_models_on_cortexso_hub(self, model_url):
+
+        # Pull model from cortexso hub
+        json_body = {
+            "model": model_url
+        }
+        response = requests.post("http://localhost:3928/models/pull", json=json_body)
+        assert response.status_code == 200, f"Failed to pull model: {model_url}"
+        
+        await wait_for_websocket_download_success_event(timeout=None)
+        
+        # Check if the model was pulled successfully
+        get_model_response = requests.get(
+            f"http://127.0.0.1:3928/models/{model_url}"
+        )
+        assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}"
+        assert (
+            get_model_response.json()["model"] == model_url
+        ), f"Unexpected model name for: {model_url}"
+
+        # Check if the model is available in the list of models
+        response = requests.get("http://localhost:3928/models")
+        assert response.status_code == 200
+        models = [i["id"] for i in response.json()["data"]]
+        assert model_url in models, f"Model not found in list: {model_url}"
+
+        # Install Engine
+        exit_code, output, error = run(
+            "Install Engine", ["engines", "install", "llama-cpp"], timeout=None, capture = False
+        )
+        root = Path.home()
+        assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt")
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+        # Start the model
+        response = requests.post("http://localhost:3928/models/start", json=json_body)
+        assert response.status_code == 200, f"status_code: {response.status_code}"
+
+        # Send an inference request
+        inference_json_body = {
+            "frequency_penalty": 0.2,
+            "max_tokens": 4096,
+            "messages": [
+                {
+                "content": "",
+                "role": "user"
+                }
+            ],
+            "model": model_url,
+            "presence_penalty": 0.6,
+            "stop": [
+                "End"
+            ],
+            "stream": False,
+            "temperature": 0.8,
+            "top_p": 0.95
+            }
+        response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"})
+        assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"
+
+        # Stop the model
+        response = requests.post("http://localhost:3928/models/stop", json=json_body)
+        assert response.status_code == 200, f"status_code: {response.status_code}"
+
+        # Uninstall Engine
+        exit_code, output, error = run(
+            "Uninstall engine", ["engines", "uninstall", "llama-cpp"]
+        )
+        assert "Engine llama-cpp uninstalled successfully!" in output
+        assert exit_code == 0, f"Install engine failed with error: {error}"