diff --git a/extensions/tensorrt-llm-extension/package.json b/extensions/tensorrt-llm-extension/package.json index af1dba8539..ec54a82c15 100644 --- a/extensions/tensorrt-llm-extension/package.json +++ b/extensions/tensorrt-llm-extension/package.json @@ -18,6 +18,8 @@ "0.1.0" ] }, + "tensorrtVersion": "0.1.6", + "provider": "nitro-tensorrt-llm", "scripts": { "build": "tsc --module commonjs && rollup -c rollup.config.ts", "build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", diff --git a/extensions/tensorrt-llm-extension/rollup.config.ts b/extensions/tensorrt-llm-extension/rollup.config.ts index 75027e0109..ee8d050d3f 100644 --- a/extensions/tensorrt-llm-extension/rollup.config.ts +++ b/extensions/tensorrt-llm-extension/rollup.config.ts @@ -16,9 +16,10 @@ export default [ plugins: [ replace({ EXTENSION_NAME: JSON.stringify(packageJson.name), - TENSORRT_VERSION: JSON.stringify('0.1.6'), + TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion), + PROVIDER: JSON.stringify(packageJson.provider), DOWNLOAD_RUNNER_URL: - process.platform === 'darwin' || process.platform === 'win32' + process.platform === 'win32' ? JSON.stringify( 'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v/nitro-windows-v-amd64-tensorrt-llm-.tar.gz' ) @@ -53,6 +54,8 @@ export default [ plugins: [ replace({ EXTENSION_NAME: JSON.stringify(packageJson.name), + TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion), + PROVIDER: JSON.stringify(packageJson.provider), LOAD_MODEL_URL: JSON.stringify( `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel` ), diff --git a/extensions/tensorrt-llm-extension/src/@types/global.d.ts b/extensions/tensorrt-llm-extension/src/@types/global.d.ts index 905e86380a..9cf5b60900 100644 --- a/extensions/tensorrt-llm-extension/src/@types/global.d.ts +++ b/extensions/tensorrt-llm-extension/src/@types/global.d.ts @@ -8,3 +8,4 @@ declare const DOWNLOAD_RUNNER_URL: string declare const TENSORRT_VERSION: string declare const COMPATIBILITY: object declare const EXTENSION_NAME: string +declare const PROVIDER: string diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts index dd120c3e0f..de548c0f15 100644 --- a/extensions/tensorrt-llm-extension/src/index.ts +++ b/extensions/tensorrt-llm-extension/src/index.ts @@ -35,7 +35,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { * Override custom function name for loading and unloading model * Which are implemented from node module */ - override provider = 'nitro-tensorrt-llm' + override provider = PROVIDER override inferenceUrl = INFERENCE_URL override nodeModule = NODE @@ -86,12 +86,13 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { } const janDataFolderPath = await getJanDataFolderPath() - const extensionName = EXTENSION_NAME + const engineVersion = TENSORRT_VERSION const executableFolderPath = await joinPath([ janDataFolderPath, 'engines', - extensionName, + this.provider, + engineVersion, firstGpu.arch, ]) diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts index e40e62130d..3766b5524c 100644 --- a/extensions/tensorrt-llm-extension/src/node/index.ts +++ b/extensions/tensorrt-llm-extension/src/node/index.ts @@ -27,7 +27,10 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined * Initializes a engine subprocess to load a machine learning model. * @param params - The model load settings. */ -async function loadModel(params: any, systemInfo?: SystemInformation): Promise<{ error: Error | undefined }> { +async function loadModel( + params: any, + systemInfo?: SystemInformation +): Promise<{ error: Error | undefined }> { // modelFolder is the absolute path to the running model folder // e.g. ~/jan/models/llama-2 let modelFolder = params.modelFolder @@ -73,7 +76,10 @@ function unloadModel(): Promise { * 2. Load model into engine subprocess * @returns */ -async function runEngineAndLoadModel(settings: ModelLoadParams, systemInfo: SystemInformation) { +async function runEngineAndLoadModel( + settings: ModelLoadParams, + systemInfo: SystemInformation +) { return unloadModel() .then(() => runEngine(systemInfo)) .then(() => loadModelRequest(settings)) @@ -150,7 +156,8 @@ async function runEngine(systemInfo: SystemInformation): Promise { ) } const janDataFolderPath = await getJanDataFolderPath() - const extensionName = EXTENSION_NAME + const tensorRtVersion = TENSORRT_VERSION + const provider = PROVIDER return new Promise((resolve, reject) => { // Current directory by default @@ -158,7 +165,8 @@ async function runEngine(systemInfo: SystemInformation): Promise { const executableFolderPath = path.join( janDataFolderPath, 'engines', - extensionName, + provider, + tensorRtVersion, gpuArch ) const nitroExecutablePath = path.join(