Skip to content

Commit

Permalink
chore: update path for tensorrt engine
Browse files Browse the repository at this point in the history
Signed-off-by: James <[email protected]>
  • Loading branch information
James committed Mar 17, 2024
1 parent 6fb647a commit 41d8f2c
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 9 deletions.
2 changes: 2 additions & 0 deletions extensions/tensorrt-llm-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
"0.1.0"
]
},
"tensorrtVersion": "0.1.6",
"provider": "nitro-tensorrt-llm",
"scripts": {
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
Expand Down
7 changes: 5 additions & 2 deletions extensions/tensorrt-llm-extension/rollup.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ export default [
plugins: [
replace({
EXTENSION_NAME: JSON.stringify(packageJson.name),
TENSORRT_VERSION: JSON.stringify('0.1.6'),
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
PROVIDER: JSON.stringify(packageJson.provider),
DOWNLOAD_RUNNER_URL:
process.platform === 'darwin' || process.platform === 'win32'
process.platform === 'win32'
? JSON.stringify(
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
)
Expand Down Expand Up @@ -53,6 +54,8 @@ export default [
plugins: [
replace({
EXTENSION_NAME: JSON.stringify(packageJson.name),
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
PROVIDER: JSON.stringify(packageJson.provider),
LOAD_MODEL_URL: JSON.stringify(
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
),
Expand Down
1 change: 1 addition & 0 deletions extensions/tensorrt-llm-extension/src/@types/global.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ declare const DOWNLOAD_RUNNER_URL: string
declare const TENSORRT_VERSION: string
declare const COMPATIBILITY: object
declare const EXTENSION_NAME: string
declare const PROVIDER: string
7 changes: 4 additions & 3 deletions extensions/tensorrt-llm-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
* Override custom function name for loading and unloading model
* Which are implemented from node module
*/
override provider = 'nitro-tensorrt-llm'
override provider = PROVIDER
override inferenceUrl = INFERENCE_URL
override nodeModule = NODE

Expand Down Expand Up @@ -86,12 +86,13 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
}

const janDataFolderPath = await getJanDataFolderPath()
const extensionName = EXTENSION_NAME
const engineVersion = TENSORRT_VERSION

const executableFolderPath = await joinPath([
janDataFolderPath,
'engines',
extensionName,
this.provider,
engineVersion,
firstGpu.arch,
])

Expand Down
16 changes: 12 additions & 4 deletions extensions/tensorrt-llm-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
* Initializes a engine subprocess to load a machine learning model.
* @param params - The model load settings.
*/
async function loadModel(params: any, systemInfo?: SystemInformation): Promise<{ error: Error | undefined }> {
async function loadModel(
params: any,
systemInfo?: SystemInformation
): Promise<{ error: Error | undefined }> {
// modelFolder is the absolute path to the running model folder
// e.g. ~/jan/models/llama-2
let modelFolder = params.modelFolder
Expand Down Expand Up @@ -73,7 +76,10 @@ function unloadModel(): Promise<any> {
* 2. Load model into engine subprocess
* @returns
*/
async function runEngineAndLoadModel(settings: ModelLoadParams, systemInfo: SystemInformation) {
async function runEngineAndLoadModel(
settings: ModelLoadParams,
systemInfo: SystemInformation
) {
return unloadModel()
.then(() => runEngine(systemInfo))
.then(() => loadModelRequest(settings))
Expand Down Expand Up @@ -150,15 +156,17 @@ async function runEngine(systemInfo: SystemInformation): Promise<void> {
)
}
const janDataFolderPath = await getJanDataFolderPath()
const extensionName = EXTENSION_NAME
const tensorRtVersion = TENSORRT_VERSION
const provider = PROVIDER

return new Promise<void>((resolve, reject) => {
// Current directory by default

const executableFolderPath = path.join(
janDataFolderPath,
'engines',
extensionName,
provider,
tensorRtVersion,
gpuArch
)
const nitroExecutablePath = path.join(
Expand Down

0 comments on commit 41d8f2c

Please sign in to comment.