diff --git a/.github/workflows/nightly-integrate-cortex-cpp.yml b/.github/workflows/nightly-integrate-cortex-cpp.yml index e0b48bc46f..8ddc40a118 100644 --- a/.github/workflows/nightly-integrate-cortex-cpp.yml +++ b/.github/workflows/nightly-integrate-cortex-cpp.yml @@ -51,13 +51,13 @@ jobs: latest_prerelease_asset_count=$(get_asset_count "$latest_prerelease_name") if [ "$current_version_name" = "$latest_prerelease_name" ]; then - echo "cortex cpp remote repo doesn't have update today, skip update cortex-cpp for today nightly build" + echo "cortex cpp remote repo doesn't have update today, skip update cortex.cpp for today nightly build" echo "::set-output name=pr_created::false" exit 0 fi if [ "$current_version_asset_count" != "$latest_prerelease_asset_count" ]; then - echo "Latest prerelease version has different number of assets, somethink went wrong, skip update cortex-cpp for today nightly build" + echo "Latest prerelease version has different number of assets, somethink went wrong, skip update cortex.cpp for today nightly build" echo "::set-output name=pr_created::false" exit 1 fi diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts index 8665850da8..d1a23dca90 100644 --- a/core/src/node/api/restful/helper/startStopModel.ts +++ b/core/src/node/api/restful/helper/startStopModel.ts @@ -1,31 +1,13 @@ -import fs from 'fs' import { join } from 'path' -import { - getJanDataFolderPath, - getJanExtensionsPath, - getSystemResourceInfo, - log, -} from '../../../helper' -import { ChildProcessWithoutNullStreams, spawn } from 'child_process' -import { Model, ModelSettingParams, PromptTemplate } from '../../../../types' -import { - LOCAL_HOST, - NITRO_DEFAULT_PORT, - NITRO_HTTP_KILL_URL, - NITRO_HTTP_LOAD_MODEL_URL, - NITRO_HTTP_VALIDATE_MODEL_URL, - SUPPORTED_MODEL_FORMAT, -} from './consts' - -// The subprocess instance for Nitro -let subprocess: ChildProcessWithoutNullStreams | undefined = undefined - -// TODO: move this to core type -interface NitroModelSettings extends ModelSettingParams { - llama_model_path: string - cpu_threads: number -} +import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper' +import { ModelSettingParams } from '../../../../types' +/** + * Start a model + * @param modelId + * @param settingParams + * @returns + */ export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => { try { await runModel(modelId, settingParams) @@ -40,316 +22,57 @@ export const startModel = async (modelId: string, settingParams?: ModelSettingPa } } -const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise => { +/** + * Run a model using installed cortex extension + * @param model + * @param settingParams + */ +const runModel = async (model: string, settingParams?: ModelSettingParams): Promise => { const janDataFolderPath = getJanDataFolderPath() - const modelFolderFullPath = join(janDataFolderPath, 'models', modelId) - - if (!fs.existsSync(modelFolderFullPath)) { - throw new Error(`Model not found: ${modelId}`) - } - - const files: string[] = fs.readdirSync(modelFolderFullPath) - - // Look for GGUF model file - const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)) - - const modelMetadataPath = join(modelFolderFullPath, 'model.json') - const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8')) - - if (!ggufBinFile) { - throw new Error('No GGUF model file found') - } - const modelBinaryPath = join(modelFolderFullPath, ggufBinFile) - - const nitroResourceProbe = await getSystemResourceInfo() - const nitroModelSettings: NitroModelSettings = { - // This is critical and requires real CPU physical core count (or performance core) - cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), - ...modelMetadata.settings, - ...settingParams, - llama_model_path: modelBinaryPath, - ...(modelMetadata.settings.mmproj && { - mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj), - }), - } - - log(`[SERVER]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`) - - // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt - if (modelMetadata.settings.prompt_template) { - const promptTemplate = modelMetadata.settings.prompt_template - const prompt = promptTemplateConverter(promptTemplate) - if (prompt?.error) { - throw new Error(prompt.error) - } - nitroModelSettings.system_prompt = prompt.system_prompt - nitroModelSettings.user_prompt = prompt.user_prompt - nitroModelSettings.ai_prompt = prompt.ai_prompt - } - - await runNitroAndLoadModel(modelId, nitroModelSettings) -} - -// TODO: move to util -const promptTemplateConverter = (promptTemplate: string): PromptTemplate => { - // Split the string using the markers - const systemMarker = '{system_message}' - const promptMarker = '{prompt}' - - if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) { - // Find the indices of the markers - const systemIndex = promptTemplate.indexOf(systemMarker) - const promptIndex = promptTemplate.indexOf(promptMarker) - - // Extract the parts of the string - const system_prompt = promptTemplate.substring(0, systemIndex) - const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex) - const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length) - - // Return the split parts - return { system_prompt, user_prompt, ai_prompt } - } else if (promptTemplate.includes(promptMarker)) { - // Extract the parts of the string for the case where only promptMarker is present - const promptIndex = promptTemplate.indexOf(promptMarker) - const user_prompt = promptTemplate.substring(0, promptIndex) - const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length) - - // Return the split parts - return { user_prompt, ai_prompt } - } - - // Return an error if none of the conditions are met - return { error: 'Cannot split prompt template' } -} - -const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => { - // Gather system information for CPU physical cores and memory - const tcpPortUsed = require('tcp-port-used') - - await stopModel(modelId) - await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000) - - /** - * There is a problem with Windows process manager - * Should wait for awhile to make sure the port is free and subprocess is killed - * The tested threshold is 500ms - **/ - if (process.platform === 'win32') { - await new Promise((resolve) => setTimeout(resolve, 500)) - } - - await spawnNitroProcess() - await loadLLMModel(modelSettings) - await validateModelStatus() -} - -const spawnNitroProcess = async (): Promise => { - log(`[SERVER]::Debug: Spawning cortex subprocess...`) - - let binaryFolder = join( + const modelFolder = join(janDataFolderPath, 'models', model) + let module = join( getJanExtensionsPath(), '@janhq', 'inference-cortex-extension', 'dist', - 'bin' - ) - - let executableOptions = executableNitroFile() - const tcpPortUsed = require('tcp-port-used') - - const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()] - // Execute the binary - log( - `[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}` + 'node', + 'index.cjs' ) - subprocess = spawn( - executableOptions.executablePath, - ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()], - { - cwd: binaryFolder, - env: { - ...process.env, - CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, - }, - } + // Just reuse the cortex extension implementation, don't duplicate then lost of sync + return import(module).then((extension) => + extension + .loadModel( + { + modelFolder, + model, + }, + settingParams + ) + .then(() => log(`[SERVER]::Debug: Model is loaded`)) + .then({ + message: 'Model started', + }) ) - - // Handle subprocess output - subprocess.stdout.on('data', (data: any) => { - log(`[SERVER]::Debug: ${data}`) - }) - - subprocess.stderr.on('data', (data: any) => { - log(`[SERVER]::Error: ${data}`) - }) - - subprocess.on('close', (code: any) => { - log(`[SERVER]::Debug: cortex exited with code: ${code}`) - subprocess = undefined - }) - - tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => { - log(`[SERVER]::Debug: cortex is ready`) - }) -} - -type NitroExecutableOptions = { - executablePath: string - cudaVisibleDevices: string } - -const executableNitroFile = (): NitroExecutableOptions => { - const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json') - let binaryFolder = join( +/* + * Stop model and kill nitro process. + */ +export const stopModel = async (_modelId: string) => { + let module = join( getJanExtensionsPath(), '@janhq', 'inference-cortex-extension', 'dist', - 'bin' + 'node', + 'index.cjs' ) - - let cudaVisibleDevices = '' - let binaryName = 'cortex-cpp' - /** - * The binary folder is different for each platform. - */ - if (process.platform === 'win32') { - /** - * For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0 - */ - let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8')) - if (nvidiaInfo['run_mode'] === 'cpu') { - binaryFolder = join(binaryFolder, 'win-cpu') - } else { - if (nvidiaInfo['cuda'].version === '12') { - binaryFolder = join(binaryFolder, 'win-cuda-12-0') - } else { - binaryFolder = join(binaryFolder, 'win-cuda-11-7') - } - cudaVisibleDevices = nvidiaInfo['gpu_highest_vram'] - } - binaryName = 'cortex-cpp.exe' - } else if (process.platform === 'darwin') { - /** - * For MacOS: mac-universal both Silicon and InteL - */ - if(process.arch === 'arm64') { - binaryFolder = join(binaryFolder, 'mac-arm64') - } else { - binaryFolder = join(binaryFolder, 'mac-amd64') - } - } else { - /** - * For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0 - */ - let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8')) - if (nvidiaInfo['run_mode'] === 'cpu') { - binaryFolder = join(binaryFolder, 'linux-cpu') - } else { - if (nvidiaInfo['cuda'].version === '12') { - binaryFolder = join(binaryFolder, 'linux-cuda-12-0') - } else { - binaryFolder = join(binaryFolder, 'linux-cuda-11-7') - } - cudaVisibleDevices = nvidiaInfo['gpu_highest_vram'] - } - } - - return { - executablePath: join(binaryFolder, binaryName), - cudaVisibleDevices, - } -} - -const validateModelStatus = async (): Promise => { - // Send a GET request to the validation URL. - // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. - const fetchRT = require('fetch-retry') - const fetchRetry = fetchRT(fetch) - - return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - retries: 5, - retryDelay: 500, - }).then(async (res: Response) => { - log(`[SERVER]::Debug: Validate model state success with response ${JSON.stringify(res)}`) - // If the response is OK, check model_loaded status. - if (res.ok) { - const body = await res.json() - // If the model is loaded, return an empty object. - // Otherwise, return an object with an error message. - if (body.model_loaded) { - return Promise.resolve() - } - } - return Promise.reject('Validate model status failed') - }) -} - -const loadLLMModel = async (settings: NitroModelSettings): Promise => { - log(`[SERVER]::Debug: Loading model with params ${JSON.stringify(settings)}`) - const fetchRT = require('fetch-retry') - const fetchRetry = fetchRT(fetch) - - return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify(settings), - retries: 3, - retryDelay: 500, - }) - .then((res: any) => { - log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`) - return Promise.resolve(res) - }) - .catch((err: any) => { - log(`[SERVER]::Error: Load model failed with error ${err}`) - return Promise.reject(err) - }) -} - -/** - * Stop model and kill nitro process. - */ -export const stopModel = async (_modelId: string) => { - if (!subprocess) { - return { - error: "Model isn't running", - } - } - return new Promise((resolve, reject) => { - const controller = new AbortController() - setTimeout(() => { - controller.abort() - reject({ - error: 'Failed to stop model: Timedout', + // Just reuse the cortex extension implementation, don't duplicate then lost of sync + return import(module).then((extension) => + extension + .unloadModel() + .then(() => log(`[SERVER]::Debug: Model is unloaded`)) + .then({ + message: 'Model stopped', }) - }, 5000) - const tcpPortUsed = require('tcp-port-used') - log(`[SERVER]::Debug: Request to kill cortex`) - - fetch(NITRO_HTTP_KILL_URL, { - method: 'DELETE', - signal: controller.signal, - }) - .then(() => { - subprocess?.kill() - subprocess = undefined - }) - .catch(() => { - // don't need to do anything, we still kill the subprocess - }) - .then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)) - .then(() => log(`[SERVER]::Debug: Nitro process is terminated`)) - .then(() => - resolve({ - message: 'Model stopped', - }) - ) - }) + ) } diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat index b7fbd32520..7acd385d53 100644 --- a/extensions/inference-nitro-extension/download.bat +++ b/extensions/inference-nitro-extension/download.bat @@ -1,3 +1,31 @@ @echo off +set BIN_PATH=./bin set /p CORTEX_VERSION=<./bin/version.txt -.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-noavx-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0/engines/cortex.llamacpp && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-noavx-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7/engines/cortex.llamacpp && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-noavx.tar.gz -e --strip 1 -o ./bin/win-cpu/engines/cortex.llamacpp && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan/engines/cortex.llamacpp + +@REM Download cortex.llamacpp binaries +set VERSION=v0.1.25 +set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64 +set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan + +call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx2/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx512/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/win-vulkan/engines/cortex.llamacpp + +@REM Loop through each folder and move DLLs (excluding engine.dll) +for %%F in (%SUBFOLDERS%) do ( + echo Processing folder: %BIN_PATH%\%%F + + @REM Move all .dll files except engine.dll + for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do ( + if /I not "%%~nxD"=="engine.dll" ( + move "%%D" "%BIN_PATH%" + ) + ) +) + +echo DLL files moved successfully. \ No newline at end of file diff --git a/extensions/inference-nitro-extension/download.sh b/extensions/inference-nitro-extension/download.sh new file mode 100755 index 0000000000..98ed8504a4 --- /dev/null +++ b/extensions/inference-nitro-extension/download.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Read CORTEX_VERSION +CORTEX_VERSION=$(cat ./bin/version.txt) +CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" + +# Detect platform +OS_TYPE=$(uname) + +if [ "$OS_TYPE" == "Linux" ]; then + # Linux downloads + download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin" + chmod +x "./bin/cortex-cpp" + + ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64" + + # Download engines for Linux + download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz" -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz" -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz" -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz" -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz" -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1 + +elif [ "$OS_TYPE" == "Darwin" ]; then + # macOS downloads + download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/mac-arm64" 1 + download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/mac-x64" 1 + chmod +x "./bin/mac-arm64/cortex-cpp" + chmod +x "./bin/mac-x64/cortex-cpp" + + ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac" + # Download engines for macOS + download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp + download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp + +else + echo "Unsupported operating system: $OS_TYPE" + exit 1 +fi diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 7be4be69a1..1e3ea6d381 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -2,7 +2,7 @@ "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", "version": "1.0.15", - "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", + "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", "author": "Jan ", @@ -10,13 +10,11 @@ "scripts": { "test": "jest", "build": "tsc --module commonjs && rollup -c rollup.config.ts", - "downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-noavx.tar.gz -e --strip 1 -o ./bin/linux-cpu/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-noavx-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-noavx-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan/engines/cortex.llamacpp", - "downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-amd64/engines/cortex.llamacpp", + "downloadnitro:linux:darwin": "./download.sh", "downloadnitro:win32": "download.bat", "downloadnitro": "run-script-os", "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", - "build:publish:win32": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", - "build:publish:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", + "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", "build:publish": "yarn test && run-script-os" }, "exports": { @@ -49,6 +47,7 @@ }, "dependencies": { "@janhq/core": "file:../../core", + "cpu-instructions": "^0.0.13", "decompress": "^4.2.1", "fetch-retry": "^5.0.6", "rxjs": "^7.8.1", @@ -68,6 +67,7 @@ "tcp-port-used", "fetch-retry", "@janhq/core", - "decompress" + "decompress", + "cpu-instructions" ] } diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts index fdd11f961a..4e1731a095 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -96,7 +96,7 @@ export default [ llama3170bJson, gemma22bJson, gemma29bJson, - gemma227bJson + gemma227bJson, ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), @@ -117,7 +117,10 @@ export default [ // Allow json resolution json(), // Compile TypeScript files - typescript({ useTsconfigDeclarationDir: true }), + typescript({ + useTsconfigDeclarationDir: true, + exclude: ['**/__tests__', '**/*.test.ts'], + }), // Compile TypeScript files // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) commonjs(), @@ -139,7 +142,7 @@ export default [ { file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true }, ], // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') - external: ['@janhq/core/node'], + external: ['@janhq/core/node', 'cpu-instructions'], watch: { include: 'src/node/**', }, @@ -147,7 +150,10 @@ export default [ // Allow json resolution json(), // Compile TypeScript files - typescript({ useTsconfigDeclarationDir: true }), + typescript({ + useTsconfigDeclarationDir: true, + exclude: ['**/__tests__', '**/*.test.ts'], + }), // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) commonjs(), // Allow node_modules resolution, so you can use 'external' to control @@ -156,7 +162,6 @@ export default [ resolve({ extensions: ['.ts', '.js', '.json'], }), - // Resolve source maps to the original source sourceMaps(), ], diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index a027e88449..d79e076d4e 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -73,6 +73,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine { this.registerModels(models) super.onLoad() + // Add additional dependencies PATH to the env executeOnMain(NODE, 'addAdditionalDependencies', { name: this.name, version: this.version, diff --git a/extensions/inference-nitro-extension/src/node/execute.test.ts b/extensions/inference-nitro-extension/src/node/execute.test.ts index cf9e84acf7..dfd8b35a96 100644 --- a/extensions/inference-nitro-extension/src/node/execute.test.ts +++ b/extensions/inference-nitro-extension/src/node/execute.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it } from '@jest/globals' import { executableNitroFile } from './execute' import { GpuSetting } from '@janhq/core' -import { sep } from 'path' +import { cpuInfo } from 'cpu-instructions' let testSettings: GpuSetting = { run_mode: 'cpu', @@ -22,6 +22,14 @@ let testSettings: GpuSetting = { } const originalPlatform = process.platform +jest.mock('cpu-instructions', () => ({ + cpuInfo: { + cpuInfo: jest.fn(), + }, +})) +let mock = cpuInfo.cpuInfo as jest.Mock +mock.mockReturnValue([]) + describe('test executable nitro file', () => { afterAll(function () { Object.defineProperty(process, 'platform', { @@ -38,17 +46,19 @@ describe('test executable nitro file', () => { }) expect(executableNitroFile(testSettings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`), + enginePath: expect.stringContaining(`mac-arm64`), + executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) Object.defineProperty(process, 'arch', { - value: 'amd64', + value: 'x64', }) expect(executableNitroFile(testSettings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`), + enginePath: expect.stringContaining(`mac-x64`), + executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -62,14 +72,11 @@ describe('test executable nitro file', () => { const settings: GpuSetting = { ...testSettings, run_mode: 'cpu', - cuda: { - exist: true, - version: '11', - }, } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`), + enginePath: expect.stringContaining(`win`), + executablePath: expect.stringContaining(`cortex-cpp.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -102,7 +109,8 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`), + enginePath: expect.stringContaining(`win-cuda-11-7`), + executablePath: expect.stringContaining(`cortex-cpp.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -135,7 +143,8 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`), + enginePath: expect.stringContaining(`win-cuda-12-0`), + executablePath: expect.stringContaining(`cortex-cpp.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -152,7 +161,8 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`), + enginePath: expect.stringContaining(`linux`), + executablePath: expect.stringContaining(`cortex-cpp`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -185,7 +195,8 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`), + enginePath: expect.stringContaining(`linux-cuda-11-7`), + executablePath: expect.stringContaining(`cortex-cpp`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -218,10 +229,203 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`), + enginePath: expect.stringContaining(`linux-cuda-12-0`), + executablePath: expect.stringContaining(`cortex-cpp`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) }) + + // Generate test for different cpu instructions on Linux + it(`executes on Linux CPU with different instructions`, () => { + Object.defineProperty(process, 'platform', { + value: 'linux', + }) + const settings: GpuSetting = { + ...testSettings, + run_mode: 'cpu', + } + + const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] + cpuInstructions.forEach((instruction) => { + mock.mockReturnValue([instruction]) + + expect(executableNitroFile(settings)).toEqual( + expect.objectContaining({ + enginePath: expect.stringContaining(`linux-${instruction}`), + executablePath: expect.stringContaining(`cortex-cpp`), + + cudaVisibleDevices: '', + vkVisibleDevices: '', + }) + ) + }) + }) + // Generate test for different cpu instructions on Windows + it(`executes on Windows CPU with different instructions`, () => { + Object.defineProperty(process, 'platform', { + value: 'win32', + }) + const settings: GpuSetting = { + ...testSettings, + run_mode: 'cpu', + } + const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] + cpuInstructions.forEach((instruction) => { + mock.mockReturnValue([instruction]) + expect(executableNitroFile(settings)).toEqual( + expect.objectContaining({ + enginePath: expect.stringContaining(`win-${instruction}`), + executablePath: expect.stringContaining(`cortex-cpp.exe`), + cudaVisibleDevices: '', + vkVisibleDevices: '', + }) + ) + }) + }) + + // Generate test for different cpu instructions on Windows + it(`executes on Windows GPU with different instructions`, () => { + Object.defineProperty(process, 'platform', { + value: 'win32', + }) + const settings: GpuSetting = { + ...testSettings, + run_mode: 'gpu', + cuda: { + exist: true, + version: '12', + }, + nvidia_driver: { + exist: true, + version: '12', + }, + gpus_in_use: ['0'], + gpus: [ + { + id: '0', + name: 'NVIDIA GeForce GTX 1080', + vram: '80000000', + }, + ], + } + const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] + cpuInstructions.forEach((instruction) => { + mock.mockReturnValue([instruction]) + expect(executableNitroFile(settings)).toEqual( + expect.objectContaining({ + enginePath: expect.stringContaining(`win-cuda-12-0`), + executablePath: expect.stringContaining(`cortex-cpp.exe`), + cudaVisibleDevices: '0', + vkVisibleDevices: '0', + }) + ) + }) + }) + + // Generate test for different cpu instructions on Linux + it(`executes on Linux GPU with different instructions`, () => { + Object.defineProperty(process, 'platform', { + value: 'linux', + }) + const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] + const settings: GpuSetting = { + ...testSettings, + run_mode: 'gpu', + cuda: { + exist: true, + version: '12', + }, + nvidia_driver: { + exist: true, + version: '12', + }, + gpus_in_use: ['0'], + gpus: [ + { + id: '0', + name: 'NVIDIA GeForce GTX 1080', + vram: '80000000', + }, + ], + } + cpuInstructions.forEach((instruction) => { + mock.mockReturnValue([instruction]) + expect(executableNitroFile(settings)).toEqual( + expect.objectContaining({ + enginePath: expect.stringContaining(`linux-cuda-12-0`), + executablePath: expect.stringContaining(`cortex-cpp`), + cudaVisibleDevices: '0', + vkVisibleDevices: '0', + }) + ) + }) + }) + + // Generate test for different cpu instructions on Linux + it(`executes on Linux Vulkan should not have CPU instructions included`, () => { + Object.defineProperty(process, 'platform', { + value: 'linux', + }) + const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] + const settings: GpuSetting = { + ...testSettings, + run_mode: 'gpu', + vulkan: true, + cuda: { + exist: true, + version: '12', + }, + nvidia_driver: { + exist: true, + version: '12', + }, + gpus_in_use: ['0'], + gpus: [ + { + id: '0', + name: 'NVIDIA GeForce GTX 1080', + vram: '80000000', + }, + ], + } + cpuInstructions.forEach((instruction) => { + mock.mockReturnValue([instruction]) + expect(executableNitroFile(settings)).toEqual( + expect.objectContaining({ + enginePath: expect.stringContaining(`linux-vulkan`), + executablePath: expect.stringContaining(`cortex-cpp`), + cudaVisibleDevices: '0', + vkVisibleDevices: '0', + }) + ) + }) + }) + + // Generate test for different cpu instructions on MacOS + it(`executes on MacOS with different instructions`, () => { + Object.defineProperty(process, 'platform', { + value: 'darwin', + }) + const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] + cpuInstructions.forEach(() => { + Object.defineProperty(process, 'platform', { + value: 'darwin', + }) + const settings: GpuSetting = { + ...testSettings, + run_mode: 'cpu', + } + mock.mockReturnValue([]) + expect(executableNitroFile(settings)).toEqual( + expect.objectContaining({ + enginePath: expect.stringContaining(`mac-x64`), + executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(), + cudaVisibleDevices: '', + vkVisibleDevices: '', + }) + ) + }) + }) }) diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts index 417734afa7..595063ed48 100644 --- a/extensions/inference-nitro-extension/src/node/execute.ts +++ b/extensions/inference-nitro-extension/src/node/execute.ts @@ -1,37 +1,59 @@ import { GpuSetting } from '@janhq/core' import * as path from 'path' +import { cpuInfo } from 'cpu-instructions' export interface NitroExecutableOptions { + enginePath: string executablePath: string cudaVisibleDevices: string vkVisibleDevices: string } -const runMode = (settings?: GpuSetting): string => { +/** + * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu. + * @param settings + * @returns + */ +const gpuRunMode = (settings?: GpuSetting): string => { if (process.platform === 'darwin') // MacOS now has universal binaries return '' - if (!settings) return 'cpu' + if (!settings) return '' return settings.vulkan === true ? 'vulkan' : settings.run_mode === 'cpu' - ? 'cpu' + ? '' : 'cuda' } +/** + * The OS & architecture that the current process is running on. + * @returns win, mac-x64, mac-arm64, or linux + */ const os = (): string => { return process.platform === 'win32' ? 'win' : process.platform === 'darwin' - ? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64' + ? process.arch === 'arm64' + ? 'mac-arm64' + : 'mac-x64' : 'linux' } +/** + * The cortex.cpp extension based on the current platform. + * @returns .exe if on Windows, otherwise an empty string. + */ const extension = (): '.exe' | '' => { return process.platform === 'win32' ? '.exe' : '' } +/** + * The CUDA version that will be set - either '11-7' or '12-0'. + * @param settings + * @returns + */ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { const isUsingCuda = settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac' @@ -40,6 +62,21 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { return settings?.cuda?.version === '11' ? '11-7' : '12-0' } +/** + * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'. + * @returns + */ +const cpuInstructions = () => { + if (process.platform === 'darwin') return '' + return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512') + ? 'avx512' + : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2') + ? 'avx2' + : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX') + ? 'avx' + : 'noavx' +} + /** * Find which executable file to run based on the current platform. * @returns The name of the executable file to run. @@ -47,15 +84,26 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { export const executableNitroFile = ( gpuSetting?: GpuSetting ): NitroExecutableOptions => { - let binaryFolder = [os(), runMode(gpuSetting), cudaVersion(gpuSetting)] + let engineFolder = [ + os(), + ...(gpuSetting?.vulkan + ? [] + : [ + gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '', + gpuRunMode(gpuSetting), + cudaVersion(gpuSetting), + ]), + gpuSetting?.vulkan ? 'vulkan' : undefined, + ] .filter((e) => !!e) .join('-') let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' - let binaryName = `cortex-cpp${extension()}` + let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}` return { - executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName), + enginePath: path.join(__dirname, '..', 'bin', engineFolder), + executablePath: path.join(__dirname, '..', 'bin', binaryName), cudaVisibleDevices, vkVisibleDevices, } diff --git a/extensions/inference-nitro-extension/src/node/index.test.ts b/extensions/inference-nitro-extension/src/node/index.test.ts new file mode 100644 index 0000000000..6e64b4a060 --- /dev/null +++ b/extensions/inference-nitro-extension/src/node/index.test.ts @@ -0,0 +1,465 @@ +jest.mock('fetch-retry', () => ({ + default: () => () => { + return Promise.resolve({ + ok: true, + status: 200, + json: () => + Promise.resolve({ + model_loaded: true, + }), + text: () => Promise.resolve(''), + }) + }, +})) + +jest.mock('path', () => ({ + default: { + isAbsolute: jest.fn(), + join: jest.fn(), + parse: () => { + return { dir: 'dir' } + }, + delimiter: { concat: () => '' }, + }, +})) + +jest.mock('decompress', () => ({ + default: () => { + return Promise.resolve() + }, +})) + +jest.mock('@janhq/core/node', () => ({ + ...jest.requireActual('@janhq/core/node'), + getJanDataFolderPath: () => '', + getSystemResourceInfo: () => { + return { + cpu: { + cores: 1, + logicalCores: 1, + threads: 1, + model: 'model', + speed: 1, + }, + memory: { + total: 1, + free: 1, + }, + gpu: { + model: 'model', + memory: 1, + cuda: { + version: 'version', + devices: 'devices', + }, + vulkan: { + version: 'version', + devices: 'devices', + }, + }, + } + }, +})) + +jest.mock('fs', () => ({ + default: { + readdirSync: () => [], + }, +})) + +jest.mock('child_process', () => ({ + exec: () => { + return { + stdout: { on: jest.fn() }, + stderr: { on: jest.fn() }, + on: jest.fn(), + } + }, + spawn: () => { + return { + stdout: { on: jest.fn() }, + stderr: { on: jest.fn() }, + on: jest.fn(), + pid: '111', + } + }, +})) + +jest.mock('tcp-port-used', () => ({ + default: { + waitUntilFree: () => Promise.resolve(true), + waitUntilUsed: () => Promise.resolve(true), + }, +})) + +jest.mock('./execute', () => ({ + executableNitroFile: () => { + return { + enginePath: 'enginePath', + executablePath: 'executablePath', + cudaVisibleDevices: 'cudaVisibleDevices', + vkVisibleDevices: 'vkVisibleDevices', + } + }, +})) + +jest.mock('terminate', () => ({ + default: (id: String, func: Function) => { + console.log(id) + func() + }, +})) + +import * as execute from './execute' +import index from './index' + +let executeMock = execute + +const modelInitOptions: any = { + modelFolder: '/path/to/model', + model: { + id: 'test', + name: 'test', + engine: 'nitro', + version: '0.0', + format: 'GGUF', + object: 'model', + sources: [], + created: 0, + description: 'test', + parameters: {}, + metadata: { + author: '', + tags: [], + size: 0, + }, + settings: { + prompt_template: '{prompt}', + llama_model_path: 'model.gguf', + }, + }, +} + +describe('loadModel', () => { + it('should load a model successfully', async () => { + // Mock the necessary parameters and system information + + const systemInfo = { + // Mock the system information if needed + } + + // Call the loadModel function + const result = await index.loadModel(modelInitOptions, systemInfo) + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) + + it('should reject with an error message if the model is not a nitro model', async () => { + // Mock the necessary parameters and system information + + const systemInfo = { + // Mock the system information if needed + } + modelInitOptions.model.engine = 'not-nitro' + // Call the loadModel function + try { + await index.loadModel(modelInitOptions, systemInfo) + } catch (error) { + // Assert that the error message is as expected + expect(error).toBe('Not a cortex model') + } + modelInitOptions.model.engine = 'nitro' + }) + + it('should reject if model load failed with an error message', async () => { + // Mock the necessary parameters and system information + + const systemInfo = { + // Mock the system information if needed + } + // Mock the fetch-retry module to return a failed response + jest.mock('fetch-retry', () => ({ + default: () => () => { + return Promise.resolve({ + ok: false, + status: 500, + json: () => + Promise.resolve({ + model_loaded: false, + }), + text: () => Promise.resolve('Failed to load model'), + }) + }, + })) + + // Call the loadModel function + try { + await index.loadModel(modelInitOptions, systemInfo) + } catch (error) { + // Assert that the error message is as expected + expect(error).toBe('Failed to load model') + } + }) + + it('should reject if port not available', async () => { + // Mock the necessary parameters and system information + + const systemInfo = { + // Mock the system information if needed + } + + // Mock the tcp-port-used module to return false + jest.mock('tcp-port-used', () => ({ + default: { + waitUntilFree: () => Promise.resolve(false), + waitUntilUsed: () => Promise.resolve(false), + }, + })) + + // Call the loadModel function + try { + await index.loadModel(modelInitOptions, systemInfo) + } catch (error) { + // Assert that the error message is as expected + expect(error).toBe('Port not available') + } + }) + + it('should run on GPU model if ngl is set', async () => { + const systemInfo: any = { + gpuSetting: { + run_mode: 'gpu', + }, + } + // Spy executableNitroFile + jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({ + enginePath: '', + executablePath: '', + cudaVisibleDevices: '', + vkVisibleDevices: '', + }) + + Object.defineProperty(process, 'platform', { value: 'win32' }) + await index.loadModel( + { + ...modelInitOptions, + model: { + ...modelInitOptions.model, + settings: { + ...modelInitOptions.model.settings, + ngl: 40, + }, + }, + }, + systemInfo + ) + expect(executeMock.executableNitroFile).toHaveBeenCalledWith({ + run_mode: 'gpu', + }) + }) + + it('should run on correct CPU instructions if ngl is not set', async () => { + const systemInfo: any = { + gpuSetting: { + run_mode: 'gpu', + }, + } + // Spy executableNitroFile + jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({ + enginePath: '', + executablePath: '', + cudaVisibleDevices: '', + vkVisibleDevices: '', + }) + + Object.defineProperty(process, 'platform', { value: 'win32' }) + await index.loadModel( + { + ...modelInitOptions, + model: { + ...modelInitOptions.model, + settings: { + ...modelInitOptions.model.settings, + ngl: undefined, + }, + }, + }, + systemInfo + ) + expect(executeMock.executableNitroFile).toHaveBeenCalledWith({ + run_mode: 'cpu', + }) + }) + + it('should run on correct CPU instructions if ngl is 0', async () => { + const systemInfo: any = { + gpuSetting: { + run_mode: 'gpu', + }, + } + // Spy executableNitroFile + jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({ + enginePath: '', + executablePath: '', + cudaVisibleDevices: '', + vkVisibleDevices: '', + }) + + Object.defineProperty(process, 'platform', { value: 'win32' }) + await index.loadModel( + { + ...modelInitOptions, + model: { + ...modelInitOptions.model, + settings: { + ...modelInitOptions.model.settings, + ngl: 0, + }, + }, + }, + systemInfo + ) + expect(executeMock.executableNitroFile).toHaveBeenCalledWith({ + run_mode: 'cpu', + }) + }) +}) + +describe('unloadModel', () => { + it('should unload a model successfully', async () => { + // Call the unloadModel function + const result = await index.unloadModel() + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) + + it('should reject with an error message if the model is not a nitro model', async () => { + // Call the unloadModel function + try { + await index.unloadModel() + } catch (error) { + // Assert that the error message is as expected + expect(error).toBe('Not a cortex model') + } + }) + + it('should reject if model unload failed with an error message', async () => { + // Mock the fetch-retry module to return a failed response + jest.mock('fetch-retry', () => ({ + default: () => () => { + return Promise.resolve({ + ok: false, + status: 500, + json: () => + Promise.resolve({ + model_unloaded: false, + }), + text: () => Promise.resolve('Failed to unload model'), + }) + }, + })) + + // Call the unloadModel function + try { + await index.unloadModel() + } catch (error) { + // Assert that the error message is as expected + expect(error).toBe('Failed to unload model') + } + }) + + it('should reject if port not available', async () => { + // Mock the tcp-port-used module to return false + jest.mock('tcp-port-used', () => ({ + default: { + waitUntilFree: () => Promise.resolve(false), + waitUntilUsed: () => Promise.resolve(false), + }, + })) + + // Call the unloadModel function + try { + await index.unloadModel() + } catch (error) { + // Assert that the error message is as expected + expect(error).toBe('Port not available') + } + }) +}) +describe('dispose', () => { + it('should dispose a model successfully on Mac', async () => { + Object.defineProperty(process, 'platform', { + value: 'darwin', + }) + + // Call the dispose function + const result = await index.dispose() + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) + + it('should kill the subprocess successfully on Windows', async () => { + Object.defineProperty(process, 'platform', { + value: 'win32', + }) + + // Call the killSubprocess function + const result = await index.dispose() + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) +}) + +describe('getCurrentNitroProcessInfo', () => { + it('should return the current nitro process info', async () => { + // Call the getCurrentNitroProcessInfo function + const result = await index.getCurrentNitroProcessInfo() + + // Assert that the result is as expected + expect(result).toEqual({ + isRunning: true, + }) + }) +}) + +describe('decompressRunner', () => { + it('should decompress the runner successfully', async () => { + jest.mock('decompress', () => ({ + default: () => { + return Promise.resolve() + }, + })) + // Call the decompressRunner function + const result = await index.decompressRunner('', '') + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) + it('should not reject if decompression failed', async () => { + jest.mock('decompress', () => ({ + default: () => { + return Promise.reject('Failed to decompress') + }, + })) + // Call the decompressRunner function + const result = await index.decompressRunner('', '') + expect(result).toBeUndefined() + }) +}) + +describe('addAdditionalDependencies', () => { + it('should add additional dependencies successfully', async () => { + // Call the addAdditionalDependencies function + const result = await index.addAdditionalDependencies({ + name: 'name', + version: 'version', + }) + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) +}) diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 77ac9af7a0..edc2d013de 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -263,10 +263,10 @@ async function validateModelStatus(modelId: string): Promise { log(`[CORTEX]::Debug: Validating model ${modelId}`) return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { method: 'POST', - body: JSON.stringify({ + body: JSON.stringify({ model: modelId, // TODO: force to use cortex llamacpp by default - engine: 'cortex.llamacpp' + engine: 'cortex.llamacpp', }), headers: { 'Content-Type': 'application/json', @@ -365,14 +365,37 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise { log(`[CORTEX]::Debug: Spawning cortex subprocess...`) return new Promise(async (resolve, reject) => { - let executableOptions = executableNitroFile(systemInfo?.gpuSetting) + let executableOptions = executableNitroFile( + // If ngl is not set or equal to 0, run on CPU with correct instructions + systemInfo?.gpuSetting + ? { + ...systemInfo.gpuSetting, + run_mode: + currentSettings?.ngl === undefined || currentSettings.ngl === 0 + ? 'cpu' + : systemInfo.gpuSetting.run_mode, + } + : undefined + ) const args: string[] = ['1', LOCAL_HOST, PORT.toString()] // Execute the binary log( `[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}` ) - log(path.parse(executableOptions.executablePath).dir) + log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`) + + // Add engine path to the PATH and LD_LIBRARY_PATH + process.env.PATH = (process.env.PATH || '').concat( + path.delimiter, + executableOptions.enginePath + ) + log(`[CORTEX] PATH: ${process.env.PATH}`) + process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( + path.delimiter, + executableOptions.enginePath + ) + subprocess = spawn( executableOptions.executablePath, ['1', LOCAL_HOST, PORT.toString()], @@ -380,6 +403,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise { cwd: path.join(path.parse(executableOptions.executablePath).dir), env: { ...process.env, + ENGINE_PATH: executableOptions.enginePath, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, // Vulkan - Support 1 device at a time for now ...(executableOptions.vkVisibleDevices?.length > 0 && { @@ -440,12 +464,19 @@ const getCurrentNitroProcessInfo = (): NitroProcessInfo => { } const addAdditionalDependencies = (data: { name: string; version: string }) => { + log( + `[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}` + ) const additionalPath = path.delimiter.concat( path.join(getJanDataFolderPath(), 'engines', data.name, data.version) ) // Set the updated PATH - process.env.PATH = (process.env.PATH || '').concat(additionalPath) + process.env.PATH = (process.env.PATH || '').concat( + path.delimiter, + additionalPath + ) process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( + path.delimiter, additionalPath ) } diff --git a/extensions/inference-nitro-extension/tsconfig.json b/extensions/inference-nitro-extension/tsconfig.json index bada43fc7b..19d8572b51 100644 --- a/extensions/inference-nitro-extension/tsconfig.json +++ b/extensions/inference-nitro-extension/tsconfig.json @@ -15,5 +15,6 @@ "importHelpers": true, "typeRoots": ["node_modules/@types"] }, - "include": ["src"] + "include": ["src"], + "exclude": ["src/**/*.test.ts"] }