diff --git a/cortex-js/src/index.ts b/cortex-js/src/index.ts index 3c8a23ed5..13a383371 100644 --- a/cortex-js/src/index.ts +++ b/cortex-js/src/index.ts @@ -1,4 +1,5 @@ import { + CORTEX_CPP_PROCESS_DESTROY_URL, CORTEX_JS_SYSTEM_URL, defaultCortexJsHost, defaultCortexJsPort, @@ -36,5 +37,12 @@ export async function start(host?: string, port?: number) { export async function stop(host?: string, port?: number) { return fetch(CORTEX_JS_SYSTEM_URL(host, port), { method: 'DELETE', - }).catch(() => {}); + }) + .catch(() => {}) + .then(() => + fetch(CORTEX_CPP_PROCESS_DESTROY_URL(host, port), { + method: 'DELETE', + }), + ) + .catch(() => {}); } diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index cc3cd6315..08162de34 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -1,4 +1,8 @@ -import { HttpStatus, Injectable } from '@nestjs/common'; +import { + BeforeApplicationShutdown, + HttpStatus, + Injectable, +} from '@nestjs/common'; import { ChildProcess, fork } from 'child_process'; import { delimiter, join } from 'path'; import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; @@ -16,7 +20,7 @@ import { import { openSync } from 'fs'; @Injectable() -export class CortexUsecases { +export class CortexUsecases implements BeforeApplicationShutdown { private cortexProcess: ChildProcess | undefined; constructor( @@ -44,6 +48,9 @@ export class CortexUsecases { const dataFolderPath = await this.fileManagerService.getDataFolderPath(); const writer = openSync(await this.fileManagerService.getLogPath(), 'a+'); + + // Attempt to stop the process if it's already running + await this.stopCortex(); // go up one level to get the binary folder, have to also work on windows this.cortexProcess = fork(join(__dirname, './../../utils/cortex-cpp'), [], { detached: true, @@ -103,8 +110,6 @@ export class CortexUsecases { ), ), ); - } catch (err) { - console.error(err.response.data); } finally { this.cortexProcess?.kill(); return { @@ -211,4 +216,9 @@ export class CortexUsecases { cortexCppPort: port, }); } + + async beforeApplicationShutdown(signal: string) { + console.log(`Received ${signal}, performing pre-shutdown tasks.`); + await this.stopCortex(); + } } diff --git a/cortex-js/src/usecases/engines/engines.usecase.ts b/cortex-js/src/usecases/engines/engines.usecase.ts index 8315cd29e..8e15278ae 100644 --- a/cortex-js/src/usecases/engines/engines.usecase.ts +++ b/cortex-js/src/usecases/engines/engines.usecase.ts @@ -120,10 +120,11 @@ export class EnginesUsecases { } if ( - (engine === Engines.llamaCPP || engine === Engines.tensorrtLLM) && - options?.runMode === 'GPU' && - options?.gpuType === 'Nvidia' && - !options?.vulkan + engine === Engines.tensorrtLLM || + (engine === Engines.llamaCPP && + options?.runMode === 'GPU' && + options?.gpuType === 'Nvidia' && + !options?.vulkan) ) await this.installCudaToolkitDependency( engine === Engines.tensorrtLLM