From 3cec0e0fdf94192067b76ecc34c90adf621929fb Mon Sep 17 00:00:00 2001 From: hiento09 <136591877+hiento09@users.noreply.github.com> Date: Fri, 8 Nov 2024 16:08:19 +0700 Subject: [PATCH] feat: automation test docker (#1653) Co-authored-by: Hien To --- .github/workflows/cortex-cpp-quality-gate.yml | 38 ++++++++++ docker/entrypoint.sh | 6 +- .../capabilities/models/sources/index.mdx | 2 +- .../test_api_cortexso_hub_llamacpp_engine.py | 10 +-- engine/e2e-test/test_api_docker.py | 75 +++++++++++++++++++ engine/e2e-test/test_api_model_delete.py | 2 +- engine/e2e-test/test_api_model_get.py | 2 +- engine/e2e-test/test_api_model_import.py | 12 +-- engine/e2e-test/test_api_model_list.py | 2 +- .../test_api_model_pull_direct_url.py | 8 +- engine/e2e-test/test_api_model_start.py | 2 +- engine/e2e-test/test_api_model_stop.py | 4 +- engine/e2e-test/test_api_model_update.py | 2 +- 13 files changed, 139 insertions(+), 26 deletions(-) create mode 100644 engine/e2e-test/test_api_docker.py diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index 1194a453b..79c7bad81 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -189,3 +189,41 @@ jobs: AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + + build-docker-and-test: + runs-on: ubuntu-latest + steps: + - name: Getting the repo + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Run Docker + run: | + docker build -t menloltd/cortex:test -f docker/Dockerfile . + docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test + + - name: use python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Run e2e tests + run: | + cd engine + python -m pip install --upgrade pip + python -m pip install -r e2e-test/requirements.txt + pytest e2e-test/test_api_docker.py + + - name: Run Docker + continue-on-error: true + if: always() + run: | + docker stop cortex + docker rm cortex diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 79f29bb93..6461eb15b 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -2,13 +2,13 @@ # Install cortex.llamacpp engine +echo "apiServerHost: 0.0.0.0" > /root/.cortexrc +echo "enableCors: true" >> /root/.cortexrc + cortex engines install llama-cpp -s /opt/cortex.llamacpp -cortex -v # Start the cortex server -sed -i 's/apiServerHost: 127.0.0.1/apiServerHost: 0.0.0.0/' /root/.cortexrc - cortex start # Keep the container running by tailing the log files diff --git a/docs/docs/capabilities/models/sources/index.mdx b/docs/docs/capabilities/models/sources/index.mdx index 186f192dc..2466b7a0e 100644 --- a/docs/docs/capabilities/models/sources/index.mdx +++ b/docs/docs/capabilities/models/sources/index.mdx @@ -272,7 +272,7 @@ curl --request POST \ Clients can abort a downloading task using the task ID. Below is a sample `curl` command to abort a download task: ```sh -curl --location --request DELETE 'http://127.0.0.1:3928/models/pull' \ +curl --location --request DELETE 'http://127.0.0.1:39281/v1/models/pull' \ --header 'Content-Type: application/json' \ --data '{ "taskId": "tinyllama:1b-gguf-q2-k" diff --git a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py index e13c4827a..9aecd3654 100644 --- a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py +++ b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py @@ -100,14 +100,14 @@ async def test_models_on_cortexso_hub(self, model_url): json_body = { "model": model_url } - response = requests.post("http://localhost:3928/models/pull", json=json_body) + response = requests.post("http://localhost:3928/v1/models/pull", json=json_body) assert response.status_code == 200, f"Failed to pull model: {model_url}" await wait_for_websocket_download_success_event(timeout=None) # Check if the model was pulled successfully get_model_response = requests.get( - f"http://127.0.0.1:3928/models/{model_url}" + f"http://127.0.0.1:3928/v1/models/{model_url}" ) assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}" assert ( @@ -115,7 +115,7 @@ async def test_models_on_cortexso_hub(self, model_url): ), f"Unexpected model name for: {model_url}" # Check if the model is available in the list of models - response = requests.get("http://localhost:3928/models") + response = requests.get("http://localhost:3928/v1/models") assert response.status_code == 200 models = [i["id"] for i in response.json()["data"]] assert model_url in models, f"Model not found in list: {model_url}" @@ -129,7 +129,7 @@ async def test_models_on_cortexso_hub(self, model_url): assert exit_code == 0, f"Install engine failed with error: {error}" # Start the model - response = requests.post("http://localhost:3928/models/start", json=json_body) + response = requests.post("http://localhost:3928/v1/models/start", json=json_body) assert response.status_code == 200, f"status_code: {response.status_code}" # Send an inference request @@ -155,7 +155,7 @@ async def test_models_on_cortexso_hub(self, model_url): assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}" # Stop the model - response = requests.post("http://localhost:3928/models/stop", json=json_body) + response = requests.post("http://localhost:3928/v1/models/stop", json=json_body) assert response.status_code == 200, f"status_code: {response.status_code}" # Uninstall Engine diff --git a/engine/e2e-test/test_api_docker.py b/engine/e2e-test/test_api_docker.py new file mode 100644 index 000000000..432224f80 --- /dev/null +++ b/engine/e2e-test/test_api_docker.py @@ -0,0 +1,75 @@ +import pytest +import requests +import os + +from pathlib import Path +from test_runner import ( + wait_for_websocket_download_success_event +) + +repo_branches = ["tinyllama:1b-gguf"] + +class TestCortexsoModels: + + @pytest.fixture(autouse=True) + def setup_and_teardown(self, request): + yield + + @pytest.mark.parametrize("model_url", repo_branches) + @pytest.mark.asyncio + async def test_models_on_cortexso_hub(self, model_url): + + # Pull model from cortexso hub + json_body = { + "model": model_url + } + response = requests.post("http://localhost:3928/v1/models/pull", json=json_body) + assert response.status_code == 200, f"Failed to pull model: {model_url}" + + await wait_for_websocket_download_success_event(timeout=None) + + # Check if the model was pulled successfully + get_model_response = requests.get( + f"http://127.0.0.1:3928/v1/models/{model_url}" + ) + assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}" + assert ( + get_model_response.json()["model"] == model_url + ), f"Unexpected model name for: {model_url}" + + # Check if the model is available in the list of models + response = requests.get("http://localhost:3928/v1/models") + assert response.status_code == 200 + models = [i["id"] for i in response.json()["data"]] + assert model_url in models, f"Model not found in list: {model_url}" + + # Start the model + response = requests.post("http://localhost:3928/v1/models/start", json=json_body) + assert response.status_code == 200, f"status_code: {response.status_code}" + + # Send an inference request + inference_json_body = { + "frequency_penalty": 0.2, + "max_tokens": 4096, + "messages": [ + { + "content": "", + "role": "user" + } + ], + "model": model_url, + "presence_penalty": 0.6, + "stop": [ + "End" + ], + "stream": False, + "temperature": 0.8, + "top_p": 0.95 + } + response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"}) + assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}" + + # Stop the model + response = requests.post("http://localhost:3928/v1/models/stop", json=json_body) + assert response.status_code == 200, f"status_code: {response.status_code}" + diff --git a/engine/e2e-test/test_api_model_delete.py b/engine/e2e-test/test_api_model_delete.py index 7415a3d5a..455032a9b 100644 --- a/engine/e2e-test/test_api_model_delete.py +++ b/engine/e2e-test/test_api_model_delete.py @@ -18,5 +18,5 @@ def setup_and_teardown(self): stop_server() def test_models_delete_should_be_successful(self): - response = requests.delete("http://localhost:3928/models/tinyllama:gguf") + response = requests.delete("http://localhost:3928/v1/models/tinyllama:gguf") assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_model_get.py b/engine/e2e-test/test_api_model_get.py index 8d5360f67..dd58ca2a4 100644 --- a/engine/e2e-test/test_api_model_get.py +++ b/engine/e2e-test/test_api_model_get.py @@ -18,5 +18,5 @@ def setup_and_teardown(self): stop_server() def test_models_get_should_be_successful(self): - response = requests.get("http://localhost:3928/models/tinyllama:gguf") + response = requests.get("http://localhost:3928/v1/models/tinyllama:gguf") assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_model_import.py b/engine/e2e-test/test_api_model_import.py index 74481594c..7efbd52da 100644 --- a/engine/e2e-test/test_api_model_import.py +++ b/engine/e2e-test/test_api_model_import.py @@ -18,7 +18,7 @@ def setup_and_teardown(self): def test_model_import_should_be_success(self): body_json = {'model': 'tinyllama:gguf', 'modelPath': '/path/to/local/gguf'} - response = requests.post("http://localhost:3928/models/import", json=body_json) + response = requests.post("http://localhost:3928/v1/models/import", json=body_json) assert response.status_code == 200 @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.") @@ -26,7 +26,7 @@ def test_model_import_with_name_should_be_success(self): body_json = {'model': 'tinyllama:gguf', 'modelPath': '/path/to/local/gguf', 'name': 'test_model'} - response = requests.post("http://localhost:3928/models/import", json=body_json) + response = requests.post("http://localhost:3928/v1/models/import", json=body_json) assert response.status_code == 200 @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.") @@ -35,10 +35,10 @@ def test_model_import_with_name_should_be_success(self): 'modelPath': '/path/to/local/gguf', 'name': 'test_model', 'option': 'copy'} - response = requests.post("http://localhost:3928/models/import", json=body_json) + response = requests.post("http://localhost:3928/v1/models/import", json=body_json) assert response.status_code == 200 # Test imported path - response = requests.get("http://localhost:3928/models/testing-model") + response = requests.get("http://localhost:3928/v1/models/testing-model") assert response.status_code == 200 # Since this is a dynamic test - require actual file path # it's not safe to assert with the gguf file name @@ -47,11 +47,11 @@ def test_model_import_with_name_should_be_success(self): def test_model_import_with_invalid_path_should_fail(self): body_json = {'model': 'tinyllama:gguf', 'modelPath': '/invalid/path/to/gguf'} - response = requests.post("http://localhost:3928/models/import", json=body_json) + response = requests.post("http://localhost:3928/v1/models/import", json=body_json) assert response.status_code == 400 def test_model_import_with_missing_model_should_fail(self): body_json = {'modelPath': '/path/to/local/gguf'} - response = requests.post("http://localhost:3928/models/import", json=body_json) + response = requests.post("http://localhost:3928/v1/models/import", json=body_json) print(response) assert response.status_code == 409 \ No newline at end of file diff --git a/engine/e2e-test/test_api_model_list.py b/engine/e2e-test/test_api_model_list.py index dc3889906..5e2a4b901 100644 --- a/engine/e2e-test/test_api_model_list.py +++ b/engine/e2e-test/test_api_model_list.py @@ -18,5 +18,5 @@ def setup_and_teardown(self): stop_server() def test_models_list_should_be_successful(self): - response = requests.get("http://localhost:3928/models") + response = requests.get("http://localhost:3928/v1/models") assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_model_pull_direct_url.py b/engine/e2e-test/test_api_model_pull_direct_url.py index aa15fbfba..ec72de147 100644 --- a/engine/e2e-test/test_api_model_pull_direct_url.py +++ b/engine/e2e-test/test_api_model_pull_direct_url.py @@ -42,11 +42,11 @@ async def test_model_pull_with_direct_url_should_be_success(self): myobj = { "model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf" } - response = requests.post("http://localhost:3928/models/pull", json=myobj) + response = requests.post("http://localhost:3928/v1/models/pull", json=myobj) assert response.status_code == 200 await wait_for_websocket_download_success_event(timeout=None) get_model_response = requests.get( - "http://127.0.0.1:3928/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf" + "http://127.0.0.1:3928/v1/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf" ) assert get_model_response.status_code == 200 assert ( @@ -60,11 +60,11 @@ async def test_model_pull_with_direct_url_should_have_desired_name(self): "model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf", "name": "smol_llama_100m" } - response = requests.post("http://localhost:3928/models/pull", json=myobj) + response = requests.post("http://localhost:3928/v1/models/pull", json=myobj) assert response.status_code == 200 await wait_for_websocket_download_success_event(timeout=None) get_model_response = requests.get( - "http://127.0.0.1:3928/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf" + "http://127.0.0.1:3928/v1/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf" ) assert get_model_response.status_code == 200 print(get_model_response.json()["name"]) diff --git a/engine/e2e-test/test_api_model_start.py b/engine/e2e-test/test_api_model_start.py index 3f4172d68..fddb33518 100644 --- a/engine/e2e-test/test_api_model_start.py +++ b/engine/e2e-test/test_api_model_start.py @@ -26,5 +26,5 @@ def setup_and_teardown(self): def test_models_start_should_be_successful(self): json_body = {"model": "tinyllama:gguf"} - response = requests.post("http://localhost:3928/models/start", json=json_body) + response = requests.post("http://localhost:3928/v1/models/start", json=json_body) assert response.status_code == 200, f"status_code: {response.status_code}" diff --git a/engine/e2e-test/test_api_model_stop.py b/engine/e2e-test/test_api_model_stop.py index 218331b98..315f51ef8 100644 --- a/engine/e2e-test/test_api_model_stop.py +++ b/engine/e2e-test/test_api_model_stop.py @@ -21,7 +21,7 @@ def setup_and_teardown(self): def test_models_stop_should_be_successful(self): json_body = {"model": "tinyllama:gguf"} - response = requests.post("http://localhost:3928/models/start", json=json_body) + response = requests.post("http://localhost:3928/v1/models/start", json=json_body) assert response.status_code == 200, f"status_code: {response.status_code}" - response = requests.post("http://localhost:3928/models/stop", json=json_body) + response = requests.post("http://localhost:3928/v1/models/stop", json=json_body) assert response.status_code == 200, f"status_code: {response.status_code}" diff --git a/engine/e2e-test/test_api_model_update.py b/engine/e2e-test/test_api_model_update.py index cf35f44f9..f862c8907 100644 --- a/engine/e2e-test/test_api_model_update.py +++ b/engine/e2e-test/test_api_model_update.py @@ -19,5 +19,5 @@ def setup_and_teardown(self): def test_models_update_should_be_successful(self): body_json = {'model': 'tinyllama:gguf'} - response = requests.patch("http://localhost:3928/models/tinyllama:gguf", json = body_json) + response = requests.patch("http://localhost:3928/v1/models/tinyllama:gguf", json = body_json) assert response.status_code == 200