Skip to content

Commit

Permalink
feat: python runtime engine (#559)
Browse files Browse the repository at this point in the history
  • Loading branch information
vansangpfiev authored May 28, 2024
1 parent 5603112 commit cd63b89
Show file tree
Hide file tree
Showing 15 changed files with 561 additions and 82 deletions.
99 changes: 99 additions & 0 deletions .github/scripts/e2e-test-python-linux-and-mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash

## Example run command
# ./e2e-test-python-linux-and-mac.sh '../../examples/build/server' './e2e-test.py'

# Check for required arguments
if [[ $# -ne 2 ]]; then
echo "Usage: $0 <path_to_binary> <path_to_python_file>"
exit 1
fi

BINARY_PATH=$1
PYTHON_FILE_EXECUTION_PATH=$2

rm /tmp/python-file-execution-res.log /tmp/server.log

# Random port to ensure it's not used
min=10000
max=11000
range=$((max - min + 1))
PORT=$((RANDOM % range + min))

# Install numpy for Python
export PYTHONHOME=$(pwd)/engines/cortex.python/python/
export LD_LIBRARY_PATH="$PYTHONHOME:$LD_LIBRARY_PATH"
export DYLD_FALLBACK_LIBRARY_PATH="$PYTHONHOME:$DYLD_FALLBACK_LIBRARY_PATH"
echo "Set Python HOME to $PYTHONHOME"
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
./engines/cortex.python/python/bin/python3 -m ensurepip
./engines/cortex.python/python/bin/python3 -m pip install --upgrade pip
./engines/cortex.python/python/bin/python3 -m pip install numpy --target=$PYTHONHOME/lib/python/site-packages/

# Start the binary file
"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/server.log &

pid=$!

if ! ps -p $pid >/dev/null; then
echo "server failed to start. Logs:"
cat /tmp/server.log
exit 1
fi

# Wait for a few seconds to let the server start
sleep 3

# Run the curl commands
response1=$(curl --connect-timeout 60 -o /tmp/python-file-execution-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/fine_tuning/job" \
--header 'Content-Type: application/json' \
--data '{
"file_execution_path": "'$PYTHON_FILE_EXECUTION_PATH'"
}')

error_occurred=0

# Verify the response
if [[ "$response1" -ne 200 ]]; then
echo "The python file execution curl command failed with status code: $response1"
cat /tmp/python-file-execution-res.log
error_occurred=1
fi

# Verify the output of the Python file in output.txt
OUTPUT_FILE="./output.txt"
EXPECTED_OUTPUT="1 2 3" # Replace with the expected content

if [[ -f "$OUTPUT_FILE" ]]; then
actual_output=$(cat "$OUTPUT_FILE")
if [[ "$actual_output" != "$EXPECTED_OUTPUT" ]]; then
echo "The output of the Python file does not match the expected output."
echo "Expected: $EXPECTED_OUTPUT"
echo "Actual: $actual_output"
error_occurred=1
else
echo "The output of the Python file matches the expected output."
fi
else
echo "Output file $OUTPUT_FILE does not exist."
error_occurred=1
fi


if [[ "$error_occurred" -eq 1 ]]; then
echo "Server test run failed!!!!!!!!!!!!!!!!!!!!!!"
echo "Server Error Logs:"
cat /tmp/server.log
kill $pid
echo "An error occurred while running the server."
exit 1
fi

echo "----------------------"
echo "Log server:"
cat /tmp/server.log

echo "Server test run successfully!"

# Kill the server process
kill $pid
119 changes: 119 additions & 0 deletions .github/scripts/e2e-test-python-windows.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
@echo off

setlocal enabledelayedexpansion

set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"

rem Check for required arguments
if "%~2"=="" (
echo Usage: %~0 ^<path_to_binary^> ^<path_to_python_file^>
exit /b 1
)

set "BINARY_PATH=%~1"
set "PYTHON_FILE_EXECUTION_PATH=%~2"

for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"

echo BINARY_NAME=%BINARY_NAME%

del %TEMP%\response1.log 2>nul
del %TEMP%\server.log 2>nul

set /a min=9999
set /a max=11000
set /a range=max-min+1
set /a PORT=%min% + %RANDOM% %% %range%

rem Install numpy for Python
set "PYTHONHOME=%cd%\engines\cortex.python\python"
echo Set Python HOME to %PYTHONHOME%
%PYTHONHOME%\python.exe -m ensurepip
%PYTHONHOME%\python.exe -m pip install --upgrade pip
%PYTHONHOME%\python.exe -m pip install numpy --target=%PYTHONHOME%\Lib\site-packages\

rem Start the binary file
start "" /B "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > "%TEMP%\server.log" 2>&1

ping -n 3 127.0.0.1 > nul

rem Capture the PID of the started process with "server" in its name
for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
set "pid=%%a"
)

echo pid=%pid%

if not defined pid (
echo server failed to start. Logs:
type %TEMP%\server.log
echo.
exit /b 1
)

rem Wait for a few seconds to let the server start

rem Define JSON strings for curl data
call set "PYTHON_FILE_EXECUTION_PATH_STRING=%%PYTHON_FILE_EXECUTION_PATH:\=\\%%"
set "curl_data1={\"file_execution_path\":\"%PYTHON_FILE_EXECUTION_PATH_STRING%\"}"

rem Print the values of curl_data for debugging
echo curl_data1=%curl_data1%

rem Run the curl commands and capture the status code
curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/fine_tuning/job" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1

set "error_occurred=0"

rem Read the status code directly from the response file
set "response1="
for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"

if "%response1%" neq "200" (
echo The first curl command failed with status code: %response1%
type %TEMP%\response1.log
echo.
set "error_occurred=1"
)

echo ----------------------
echo Log python file execution:
type %TEMP%\response1.log
echo.

rem Verification step: Check the contents of output.txt
set "expected_output=1 2 3"
set "actual_output="
if exist "output.txt" (
for /f "delims=" %%x in (output.txt) do set "actual_output=%%x"
if "!actual_output!"=="!expected_output!" (
echo Verification succeeded: output.txt contains the expected data.
) else (
echo Verification failed: output.txt does not contain the expected data.
echo Expected: !expected_output!
echo Actual: !actual_output!
set "error_occurred=1"
)
) else (
echo Verification failed: output.txt does not exist.
set "error_occurred=1"
)

echo ----------------------
echo Server logs:
type %TEMP%\server.log
echo.

if "%error_occurred%"=="1" (
echo Server test run failed!!!!!!!!!!!!!!!!!!!!!!
taskkill /f /pid %pid%
echo An error occurred while running the server.
exit /b 1
)

echo Server test run successfully!

rem Kill the server process
taskkill /f /im server.exe 2>nul || exit /B 0

endlocal
9 changes: 9 additions & 0 deletions .github/scripts/python-file-to-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import sys;
for path in sys.path:
print(path)

import numpy as np
print("Numpy version: " + np.__version__)

with open('output.txt', 'w') as file:
file.write(' '.join(map(str, np.array([1, 2, 3]))))
26 changes: 26 additions & 0 deletions .github/workflows/cortex-cpp-quality-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ on:
env:
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
PYTHON_FILE_EXECUTION_PATH: "python-file-to-test.py"

jobs:
build-and-test:
Expand All @@ -26,107 +27,126 @@ jobs:
runs-on: "ubuntu-18-04"
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
run-e2e: true
run-python-e2e: true

- os: "linux"
name: "amd64-avx"
runs-on: "ubuntu-18-04"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
run-e2e: false
run-python-e2e: false

- os: "linux"
name: "amd64-avx512"
runs-on: "ubuntu-18-04"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
run-e2e: false
run-python-e2e: false

- os: "linux"
name: "amd64-vulkan"
runs-on: "ubuntu-18-04-cuda-11-7"
cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
run-e2e: false
run-python-e2e: false

- os: "linux"
name: "amd64-cuda-11-7"
runs-on: "ubuntu-18-04-cuda-11-7"
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
run-e2e: false
run-python-e2e: false

- os: "linux"
name: "amd64-cuda-12-0"
runs-on: "ubuntu-18-04-cuda-12-0"
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
run-e2e: false
run-python-e2e: false

- os: "mac"
name: "amd64"
runs-on: "macos-13"
cmake-flags: ""
run-e2e: true
run-python-e2e: true

- os: "mac"
name: "arm64"
runs-on: "mac-silicon"
cmake-flags: "-DMAC_ARM64=ON"
run-e2e: true
run-python-e2e: true

- os: "windows"
name: "amd64-avx2"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: true
run-python-e2e: true

- os: "windows"
name: "amd64-avx"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx512"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-vulkan"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx2-cuda-12-0"
runs-on: "windows-cuda-12-0"
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx-cuda-12-0"
runs-on: "windows-cuda-12-0"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx512-cuda-12-0"
runs-on: "windows-cuda-12-0"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx2-cuda-11-7"
runs-on: "windows-cuda-11-7"
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx-cuda-11-7"
runs-on: "windows-cuda-11-7"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx512-cuda-11-7"
runs-on: "windows-cuda-11-7"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

steps:
- name: Clone
Expand Down Expand Up @@ -161,6 +181,12 @@ jobs:
cd cortex-cpp
make run-e2e-test RUN_TESTS=true LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}
- name: Run python e2e testing
if: ${{ matrix.run-python-e2e }}
run: |
cd cortex-cpp
make run-python-e2e-test RUN_TESTS=true PYTHON_FILE_EXECUTION_PATH=${{ env.PYTHON_FILE_EXECUTION_PATH }}
- name: Upload Artifact
uses: actions/upload-artifact@v2
with:
Expand Down
3 changes: 3 additions & 0 deletions cortex-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.5)
project(cortex-cpp C CXX)

include(engines/cortex.llamacpp/engine.cmake)
if(NOT LLAMA_CUDA AND (LLAMA_AVX2 OR APPLE))
include(engines/cortex.python/engine.cmake)
endif()
include(CheckIncludeFileCXX)

check_include_file_cxx(any HAS_ANY)
Expand Down
Loading

0 comments on commit cd63b89

Please sign in to comment.