janhq · hiento09 · Nov 8, 2024 · Nov 8, 2024
diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -189,3 +189,41 @@ jobs:
           AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
+
+  build-docker-and-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Getting the repo
+        uses: actions/checkout@v3
+        with:
+          submodules: 'recursive'
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Run Docker
+        run: |
+          docker build -t menloltd/cortex:test -f docker/Dockerfile .
+          docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
+
+      - name: use python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Run e2e tests
+        run: |
+          cd engine
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          pytest e2e-test/test_api_docker.py
+
+      - name: Run Docker
+        continue-on-error: true
+        if: always()  
+        run: |
+          docker stop cortex
+          docker rm cortex
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
@@ -2,13 +2,13 @@
 
 # Install cortex.llamacpp engine
 
+echo "apiServerHost: 0.0.0.0" > /root/.cortexrc
+echo "enableCors: true" >> /root/.cortexrc
+
 cortex engines install llama-cpp -s /opt/cortex.llamacpp
-cortex -v
 
 # Start the cortex server
 
-sed -i 's/apiServerHost: 127.0.0.1/apiServerHost: 0.0.0.0/' /root/.cortexrc
-
 cortex start
 
 # Keep the container running by tailing the log files

diff --git a/docs/docs/capabilities/models/sources/index.mdx b/docs/docs/capabilities/models/sources/index.mdx
@@ -272,7 +272,7 @@ curl --request POST \
 Clients can abort a downloading task using the task ID. Below is a sample `curl` command to abort a download task:
 
 ```sh
-curl --location --request DELETE 'http://127.0.0.1:3928/models/pull' \
+curl --location --request DELETE 'http://127.0.0.1:39281/v1/models/pull' \
 --header 'Content-Type: application/json' \
 --data '{
     "taskId": "tinyllama:1b-gguf-q2-k"

diff --git a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
@@ -100,22 +100,22 @@ async def test_models_on_cortexso_hub(self, model_url):
         json_body = {
             "model": model_url
         }
-        response = requests.post("http://localhost:3928/models/pull", json=json_body)
+        response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
         assert response.status_code == 200, f"Failed to pull model: {model_url}"
 
         await wait_for_websocket_download_success_event(timeout=None)
 
         # Check if the model was pulled successfully
         get_model_response = requests.get(
-            f"http://127.0.0.1:3928/models/{model_url}"
+            f"http://127.0.0.1:3928/v1/models/{model_url}"
         )
         assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}"
         assert (
             get_model_response.json()["model"] == model_url
         ), f"Unexpected model name for: {model_url}"
 
         # Check if the model is available in the list of models
-        response = requests.get("http://localhost:3928/models")
+        response = requests.get("http://localhost:3928/v1/models")
         assert response.status_code == 200
         models = [i["id"] for i in response.json()["data"]]
         assert model_url in models, f"Model not found in list: {model_url}"
@@ -129,7 +129,7 @@ async def test_models_on_cortexso_hub(self, model_url):
         assert exit_code == 0, f"Install engine failed with error: {error}"
 
         # Start the model
-        response = requests.post("http://localhost:3928/models/start", json=json_body)
+        response = requests.post("http://localhost:3928/v1/models/start", json=json_body)
         assert response.status_code == 200, f"status_code: {response.status_code}"
 
         # Send an inference request
@@ -155,7 +155,7 @@ async def test_models_on_cortexso_hub(self, model_url):
         assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"
 
         # Stop the model
-        response = requests.post("http://localhost:3928/models/stop", json=json_body)
+        response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
         assert response.status_code == 200, f"status_code: {response.status_code}"
 
         # Uninstall Engine

diff --git a/engine/e2e-test/test_api_docker.py b/engine/e2e-test/test_api_docker.py
@@ -0,0 +1,75 @@
+import pytest
+import requests
+import os
+
+from pathlib import Path
+from test_runner import (
+    wait_for_websocket_download_success_event
+)
+
+repo_branches = ["tinyllama:1b-gguf"]
+
+class TestCortexsoModels:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self, request):
+        yield
+
+    @pytest.mark.parametrize("model_url", repo_branches)
+    @pytest.mark.asyncio
+    async def test_models_on_cortexso_hub(self, model_url):
+
+        # Pull model from cortexso hub
+        json_body = {
+            "model": model_url
+        }
+        response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
+        assert response.status_code == 200, f"Failed to pull model: {model_url}"
+
+        await wait_for_websocket_download_success_event(timeout=None)
+
+        # Check if the model was pulled successfully
+        get_model_response = requests.get(
+            f"http://127.0.0.1:3928/v1/models/{model_url}"
+        )
+        assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}"
+        assert (
+            get_model_response.json()["model"] == model_url
+        ), f"Unexpected model name for: {model_url}"
+
+        # Check if the model is available in the list of models
+        response = requests.get("http://localhost:3928/v1/models")
+        assert response.status_code == 200
+        models = [i["id"] for i in response.json()["data"]]
+        assert model_url in models, f"Model not found in list: {model_url}"
+
+        # Start the model
+        response = requests.post("http://localhost:3928/v1/models/start", json=json_body)
+        assert response.status_code == 200, f"status_code: {response.status_code}"
+
+        # Send an inference request
+        inference_json_body = {
+            "frequency_penalty": 0.2,
+            "max_tokens": 4096,
+            "messages": [
+                {
+                "content": "",
+                "role": "user"
+                }
+            ],
+            "model": model_url,
+            "presence_penalty": 0.6,
+            "stop": [
+                "End"
+            ],
+            "stream": False,
+            "temperature": 0.8,
+            "top_p": 0.95
+            }
+        response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"})
+        assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"
+
+        # Stop the model
+        response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
+        assert response.status_code == 200, f"status_code: {response.status_code}"
+
diff --git a/engine/e2e-test/test_api_model_delete.py b/engine/e2e-test/test_api_model_delete.py
@@ -18,5 +18,5 @@ def setup_and_teardown(self):
         stop_server()
 
     def test_models_delete_should_be_successful(self):
-        response = requests.delete("http://localhost:3928/models/tinyllama:gguf")
+        response = requests.delete("http://localhost:3928/v1/models/tinyllama:gguf")
         assert response.status_code == 200
diff --git a/engine/e2e-test/test_api_model_get.py b/engine/e2e-test/test_api_model_get.py
@@ -18,5 +18,5 @@ def setup_and_teardown(self):
         stop_server()
 
     def test_models_get_should_be_successful(self):
-        response = requests.get("http://localhost:3928/models/tinyllama:gguf")
+        response = requests.get("http://localhost:3928/v1/models/tinyllama:gguf")
         assert response.status_code == 200
diff --git a/engine/e2e-test/test_api_model_import.py b/engine/e2e-test/test_api_model_import.py
@@ -18,15 +18,15 @@ def setup_and_teardown(self):
     def test_model_import_should_be_success(self):
         body_json = {'model': 'tinyllama:gguf',
                      'modelPath': '/path/to/local/gguf'}
-        response = requests.post("http://localhost:3928/models/import", json=body_json)              
+        response = requests.post("http://localhost:3928/v1/models/import", json=body_json)              
         assert response.status_code == 200
 
     @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
     def test_model_import_with_name_should_be_success(self):
         body_json = {'model': 'tinyllama:gguf',
                      'modelPath': '/path/to/local/gguf',
                      'name': 'test_model'}
-        response = requests.post("http://localhost:3928/models/import", json=body_json)
+        response = requests.post("http://localhost:3928/v1/models/import", json=body_json)
         assert response.status_code == 200
 
     @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
@@ -35,10 +35,10 @@ def test_model_import_with_name_should_be_success(self):
                      'modelPath': '/path/to/local/gguf',
                      'name': 'test_model',
                      'option': 'copy'}
-        response = requests.post("http://localhost:3928/models/import", json=body_json)
+        response = requests.post("http://localhost:3928/v1/models/import", json=body_json)
         assert response.status_code == 200
         # Test imported path
-        response = requests.get("http://localhost:3928/models/testing-model")
+        response = requests.get("http://localhost:3928/v1/models/testing-model")
         assert response.status_code == 200
         # Since this is a dynamic test - require actual file path
         # it's not safe to assert with the gguf file name
@@ -47,11 +47,11 @@ def test_model_import_with_name_should_be_success(self):
     def test_model_import_with_invalid_path_should_fail(self):
         body_json = {'model': 'tinyllama:gguf',
                      'modelPath': '/invalid/path/to/gguf'}
-        response = requests.post("http://localhost:3928/models/import", json=body_json)
+        response = requests.post("http://localhost:3928/v1/models/import", json=body_json)
         assert response.status_code == 400
 
     def test_model_import_with_missing_model_should_fail(self):
         body_json = {'modelPath': '/path/to/local/gguf'}
-        response = requests.post("http://localhost:3928/models/import", json=body_json)
+        response = requests.post("http://localhost:3928/v1/models/import", json=body_json)
         print(response)
         assert response.status_code == 409
diff --git a/engine/e2e-test/test_api_model_list.py b/engine/e2e-test/test_api_model_list.py
@@ -18,5 +18,5 @@ def setup_and_teardown(self):
         stop_server()
 
     def test_models_list_should_be_successful(self):
-        response = requests.get("http://localhost:3928/models")
+        response = requests.get("http://localhost:3928/v1/models")
         assert response.status_code == 200
diff --git a/engine/e2e-test/test_api_model_pull_direct_url.py b/engine/e2e-test/test_api_model_pull_direct_url.py
@@ -42,11 +42,11 @@ async def test_model_pull_with_direct_url_should_be_success(self):
         myobj = {
             "model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf"
         }
-        response = requests.post("http://localhost:3928/models/pull", json=myobj)
+        response = requests.post("http://localhost:3928/v1/models/pull", json=myobj)
         assert response.status_code == 200
         await wait_for_websocket_download_success_event(timeout=None)
         get_model_response = requests.get(
-            "http://127.0.0.1:3928/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
+            "http://127.0.0.1:3928/v1/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
         )
         assert get_model_response.status_code == 200
         assert (
@@ -60,11 +60,11 @@ async def test_model_pull_with_direct_url_should_have_desired_name(self):
             "model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf",
             "name": "smol_llama_100m"
         }
-        response = requests.post("http://localhost:3928/models/pull", json=myobj)
+        response = requests.post("http://localhost:3928/v1/models/pull", json=myobj)
         assert response.status_code == 200
         await wait_for_websocket_download_success_event(timeout=None)
         get_model_response = requests.get(
-            "http://127.0.0.1:3928/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
+            "http://127.0.0.1:3928/v1/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
         )
         assert get_model_response.status_code == 200
         print(get_model_response.json()["name"])

diff --git a/engine/e2e-test/test_api_model_start.py b/engine/e2e-test/test_api_model_start.py
@@ -26,5 +26,5 @@ def setup_and_teardown(self):
 
     def test_models_start_should_be_successful(self):
         json_body = {"model": "tinyllama:gguf"}
-        response = requests.post("http://localhost:3928/models/start", json=json_body)
+        response = requests.post("http://localhost:3928/v1/models/start", json=json_body)
         assert response.status_code == 200, f"status_code: {response.status_code}"
diff --git a/engine/e2e-test/test_api_model_stop.py b/engine/e2e-test/test_api_model_stop.py
@@ -21,7 +21,7 @@ def setup_and_teardown(self):
 
     def test_models_stop_should_be_successful(self):
         json_body = {"model": "tinyllama:gguf"}
-        response = requests.post("http://localhost:3928/models/start", json=json_body)
+        response = requests.post("http://localhost:3928/v1/models/start", json=json_body)
         assert response.status_code == 200, f"status_code: {response.status_code}"
-        response = requests.post("http://localhost:3928/models/stop", json=json_body)
+        response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
         assert response.status_code == 200, f"status_code: {response.status_code}"
diff --git a/engine/e2e-test/test_api_model_update.py b/engine/e2e-test/test_api_model_update.py
@@ -19,5 +19,5 @@ def setup_and_teardown(self):
 
     def test_models_update_should_be_successful(self):
         body_json = {'model': 'tinyllama:gguf'}
-        response = requests.patch("http://localhost:3928/models/tinyllama:gguf", json = body_json)        
+        response = requests.patch("http://localhost:3928/v1/models/tinyllama:gguf", json = body_json)        
         assert response.status_code == 200