feat: python runtime engine (#559)

janhq · May 28, 2024 · cd63b89 · cd63b89
1 parent 5603112
commit cd63b89
Show file tree

Hide file tree

Showing 15 changed files with 561 additions and 82 deletions.
diff --git a/.github/scripts/e2e-test-python-linux-and-mac.sh b/.github/scripts/e2e-test-python-linux-and-mac.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+## Example run command
+# ./e2e-test-python-linux-and-mac.sh '../../examples/build/server' './e2e-test.py'
+
+# Check for required arguments
+if [[ $# -ne 2 ]]; then
+    echo "Usage: $0 <path_to_binary> <path_to_python_file>"
+    exit 1
+fi
+
+BINARY_PATH=$1
+PYTHON_FILE_EXECUTION_PATH=$2
+
+rm /tmp/python-file-execution-res.log /tmp/server.log
+
+# Random port to ensure it's not used
+min=10000
+max=11000
+range=$((max - min + 1))
+PORT=$((RANDOM % range + min))
+
+# Install numpy for Python
+export PYTHONHOME=$(pwd)/engines/cortex.python/python/
+export LD_LIBRARY_PATH="$PYTHONHOME:$LD_LIBRARY_PATH"
+export DYLD_FALLBACK_LIBRARY_PATH="$PYTHONHOME:$DYLD_FALLBACK_LIBRARY_PATH"
+echo "Set Python HOME to $PYTHONHOME"
+echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
+./engines/cortex.python/python/bin/python3 -m ensurepip
+./engines/cortex.python/python/bin/python3 -m pip install --upgrade pip
+./engines/cortex.python/python/bin/python3 -m pip install numpy --target=$PYTHONHOME/lib/python/site-packages/
+
+# Start the binary file
+"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/server.log &
+
+pid=$!
+
+if ! ps -p $pid >/dev/null; then
+    echo "server failed to start. Logs:"
+    cat /tmp/server.log
+    exit 1
+fi
+
+# Wait for a few seconds to let the server start
+sleep 3
+
+# Run the curl commands
+response1=$(curl --connect-timeout 60 -o /tmp/python-file-execution-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/fine_tuning/job" \
+    --header 'Content-Type: application/json' \
+    --data '{
+        "file_execution_path": "'$PYTHON_FILE_EXECUTION_PATH'"
+    }')
+
+error_occurred=0
+
+# Verify the response
+if [[ "$response1" -ne 200 ]]; then
+    echo "The python file execution curl command failed with status code: $response1"
+    cat /tmp/python-file-execution-res.log
+    error_occurred=1
+fi
+
+# Verify the output of the Python file in output.txt
+OUTPUT_FILE="./output.txt"
+EXPECTED_OUTPUT="1 2 3"  # Replace with the expected content
+
+if [[ -f "$OUTPUT_FILE" ]]; then
+    actual_output=$(cat "$OUTPUT_FILE")
+    if [[ "$actual_output" != "$EXPECTED_OUTPUT" ]]; then
+        echo "The output of the Python file does not match the expected output."
+        echo "Expected: $EXPECTED_OUTPUT"
+        echo "Actual: $actual_output"
+        error_occurred=1
+    else
+        echo "The output of the Python file matches the expected output."
+    fi
+else
+    echo "Output file $OUTPUT_FILE does not exist."
+    error_occurred=1
+fi
+
+
+if [[ "$error_occurred" -eq 1 ]]; then
+    echo "Server test run failed!!!!!!!!!!!!!!!!!!!!!!"
+    echo "Server Error Logs:"
+    cat /tmp/server.log
+    kill $pid
+    echo "An error occurred while running the server."
+    exit 1
+fi
+
+echo "----------------------"
+echo "Log server:"
+cat /tmp/server.log
+
+echo "Server test run successfully!"
+
+# Kill the server process
+kill $pid
diff --git a/.github/scripts/e2e-test-python-windows.bat b/.github/scripts/e2e-test-python-windows.bat
@@ -0,0 +1,119 @@
+@echo off
+
+setlocal enabledelayedexpansion
+
+set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"
+
+rem Check for required arguments
+if "%~2"=="" (
+    echo Usage: %~0 ^<path_to_binary^> ^<path_to_python_file^>
+    exit /b 1
+)
+
+set "BINARY_PATH=%~1"
+set "PYTHON_FILE_EXECUTION_PATH=%~2"
+
+for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"
+
+echo BINARY_NAME=%BINARY_NAME%
+
+del %TEMP%\response1.log 2>nul
+del %TEMP%\server.log 2>nul
+
+set /a min=9999
+set /a max=11000
+set /a range=max-min+1
+set /a PORT=%min% + %RANDOM% %% %range%
+
+rem Install numpy for Python
+set "PYTHONHOME=%cd%\engines\cortex.python\python"
+echo Set Python HOME to %PYTHONHOME%
+%PYTHONHOME%\python.exe -m ensurepip
+%PYTHONHOME%\python.exe -m pip install --upgrade pip
+%PYTHONHOME%\python.exe -m pip install numpy --target=%PYTHONHOME%\Lib\site-packages\
+
+rem Start the binary file
+start "" /B "%BINARY_PATH%" 1 "127.0.0.1" %PORT%  > "%TEMP%\server.log" 2>&1
+
+ping -n 3 127.0.0.1 > nul
+
+rem Capture the PID of the started process with "server" in its name
+for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
+    set "pid=%%a"
+)
+
+echo pid=%pid%
+
+if not defined pid (
+    echo server failed to start. Logs:
+    type %TEMP%\server.log
+    echo.
+    exit /b 1
+)
+
+rem Wait for a few seconds to let the server start
+
+rem Define JSON strings for curl data
+call set "PYTHON_FILE_EXECUTION_PATH_STRING=%%PYTHON_FILE_EXECUTION_PATH:\=\\%%"
+set "curl_data1={\"file_execution_path\":\"%PYTHON_FILE_EXECUTION_PATH_STRING%\"}"
+
+rem Print the values of curl_data for debugging
+echo curl_data1=%curl_data1%
+
+rem Run the curl commands and capture the status code
+curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/fine_tuning/job" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
+
+set "error_occurred=0"
+
+rem Read the status code directly from the response file
+set "response1="
+for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"
+
+if "%response1%" neq "200" (
+    echo The first curl command failed with status code: %response1%
+    type %TEMP%\response1.log
+    echo.
+    set "error_occurred=1"
+)
+
+echo ----------------------
+echo Log python file execution:
+type %TEMP%\response1.log
+echo.
+
+rem Verification step: Check the contents of output.txt
+set "expected_output=1 2 3"
+set "actual_output="
+if exist "output.txt" (
+    for /f "delims=" %%x in (output.txt) do set "actual_output=%%x"
+    if "!actual_output!"=="!expected_output!" (
+        echo Verification succeeded: output.txt contains the expected data.
+    ) else (
+        echo Verification failed: output.txt does not contain the expected data.
+        echo Expected: !expected_output!
+        echo Actual: !actual_output!
+        set "error_occurred=1"
+    )
+) else (
+    echo Verification failed: output.txt does not exist.
+    set "error_occurred=1"
+)
+
+echo ----------------------
+echo Server logs:
+type %TEMP%\server.log
+echo.
+
+if "%error_occurred%"=="1" (
+    echo Server test run failed!!!!!!!!!!!!!!!!!!!!!!
+    taskkill /f /pid %pid%
+    echo An error occurred while running the server.
+    exit /b 1
+)
+
+echo Server test run successfully!
+
+rem Kill the server process
+taskkill /f /im server.exe 2>nul || exit /B 0
+
+endlocal
diff --git a/.github/scripts/python-file-to-test.py b/.github/scripts/python-file-to-test.py
@@ -0,0 +1,9 @@
+import sys;
+for path in sys.path:
+    print(path)
+
+import numpy as np
+print("Numpy version: " + np.__version__)
+
+with open('output.txt', 'w') as file:
+    file.write(' '.join(map(str, np.array([1, 2, 3]))))
diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -12,6 +12,7 @@ on:
 env:
   LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
   EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
+  PYTHON_FILE_EXECUTION_PATH: "python-file-to-test.py"
 
 jobs:
   build-and-test:
@@ -26,107 +27,126 @@ jobs:
             runs-on: "ubuntu-18-04"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
             run-e2e: true
+            run-python-e2e: true
 
           - os: "linux"
             name: "amd64-avx"
             runs-on: "ubuntu-18-04"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "linux"
             name: "amd64-avx512"
             runs-on: "ubuntu-18-04"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "linux"
             name: "amd64-vulkan"
             runs-on: "ubuntu-18-04-cuda-11-7"
             cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "linux"
             name: "amd64-cuda-11-7"
             runs-on: "ubuntu-18-04-cuda-11-7"
             cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "linux"
             name: "amd64-cuda-12-0"
             runs-on: "ubuntu-18-04-cuda-12-0"
             cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "mac"
             name: "amd64"
             runs-on: "macos-13"
             cmake-flags: ""
             run-e2e: true
+            run-python-e2e: true
 
           - os: "mac"
             name: "arm64"
             runs-on: "mac-silicon"
             cmake-flags: "-DMAC_ARM64=ON"
             run-e2e: true
+            run-python-e2e: true
 
           - os: "windows"
             name: "amd64-avx2"
             runs-on: "windows-latest"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: true
+            run-python-e2e: true
 
           - os: "windows"
             name: "amd64-avx"
             runs-on: "windows-latest"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx512"
             runs-on: "windows-latest"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-vulkan"
             runs-on: "windows-latest"
             cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx2-cuda-12-0"
             runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx-cuda-12-0"
             runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx512-cuda-12-0"
             runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx2-cuda-11-7"
             runs-on: "windows-cuda-11-7"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx-cuda-11-7"
             runs-on: "windows-cuda-11-7"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
+
           - os: "windows"
             name: "amd64-avx512-cuda-11-7"
             runs-on: "windows-cuda-11-7"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
     steps:
       - name: Clone
@@ -161,6 +181,12 @@ jobs:
           cd cortex-cpp
           make run-e2e-test RUN_TESTS=true LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}
 
+      - name: Run python e2e testing
+        if: ${{ matrix.run-python-e2e }}
+        run: |
+          cd cortex-cpp
+          make run-python-e2e-test RUN_TESTS=true PYTHON_FILE_EXECUTION_PATH=${{ env.PYTHON_FILE_EXECUTION_PATH }}
+
       - name: Upload Artifact
         uses: actions/upload-artifact@v2
         with:

diff --git a/cortex-cpp/CMakeLists.txt b/cortex-cpp/CMakeLists.txt
@@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.5)
 project(cortex-cpp C CXX)
 
 include(engines/cortex.llamacpp/engine.cmake)
+if(NOT LLAMA_CUDA AND (LLAMA_AVX2 OR APPLE))
+  include(engines/cortex.python/engine.cmake)
+endif()
 include(CheckIncludeFileCXX)
 
 check_include_file_cxx(any HAS_ANY)