Skip to content

Commit

Permalink
feat: check Cuda version for Tensorrt-llm models
Browse files Browse the repository at this point in the history
  • Loading branch information
marknguyen1302 authored Jul 4, 2024
1 parent a543315 commit 5859371
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ export class ModelPullCommand extends CommandRunner {
}
const modelId = passedParams[0];

checkModelCompatibility(modelId);
await checkModelCompatibility(modelId);

await this.modelsCliUsecases.pullModel(modelId).catch((e: Error) => {
if (e instanceof ModelNotFoundException)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ export class ModelStartCommand extends CommandRunner {
process.exit(1);
}

checkModelCompatibility(modelId);
await checkModelCompatibility(modelId);
checkingSpinner.succeed('Model found');

const engine = existingModel.engine || Engines.llamaCPP;
// Pull engine if not exist
if (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ export class RunCommand extends CommandRunner {
checkingSpinner.succeed('Model found');

// Check model compatibility on this machine
checkModelCompatibility(modelId);
await checkModelCompatibility(modelId);

const engine = existingModel.engine || Engines.llamaCPP;
// Pull engine if not exist
if (
Expand Down
2 changes: 2 additions & 0 deletions cortex-js/src/infrastructure/constants/cortex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ export const CUDA_DOWNLOAD_URL =
'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz';

export const telemetryServerUrl = 'https://telemetry.jan.ai';

export const MIN_CUDA_VERSION = '12.3';
28 changes: 28 additions & 0 deletions cortex-js/src/utils/cuda.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export type GpuSettingInfo = {
* @returns CUDA Version 11 | 12
*/
export const cudaVersion = async () => {

let filesCuda12: string[];
let filesCuda11: string[];
let paths: string[];
Expand Down Expand Up @@ -71,6 +72,33 @@ export const checkNvidiaGPUExist = (): Promise<boolean> => {
});
};

export const getCudaVersion = (): Promise<string> => {
return new Promise<string>((resolve, reject) => {
// Execute the nvidia-smi command
exec('nvidia-smi', (error, stdout) => {
if (!error) {
const cudaVersionLine = stdout.split('\n').find(line => line.includes('CUDA Version'));

if (cudaVersionLine) {
// Extract the CUDA version number
const cudaVersionMatch = cudaVersionLine.match(/CUDA Version:\s+(\d+\.\d+)/);
if (cudaVersionMatch) {
const cudaVersion = cudaVersionMatch[1];
resolve(cudaVersion);
} else {
reject('CUDA Version not found.');
}
} else {
reject('CUDA Version not found.');
}
} else {
reject(error);
}

});
});
};

/**
* Get GPU information from the system
* @returns GPU information
Expand Down
28 changes: 24 additions & 4 deletions cortex-js/src/utils/model-check.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,31 @@
export const checkModelCompatibility = (modelId: string) => {
import { MIN_CUDA_VERSION } from "@/infrastructure/constants/cortex";
import { getCudaVersion } from "./cuda";

export const checkModelCompatibility = async (modelId: string) => {
if (modelId.includes('onnx') && process.platform !== 'win32') {
console.error('The ONNX engine does not support this OS yet.');
process.exit(1);
}

if (modelId.includes('tensorrt-llm') && process.platform === 'darwin') {
console.error('Tensorrt-LLM models are not supported on this OS');
process.exit(1);
if (modelId.includes('tensorrt-llm') ) {
if(process.platform === 'darwin'){
console.error('Tensorrt-LLM models are not supported on this OS');
process.exit(1);
}

try{
const version = await getCudaVersion();
const [currentMajor, currentMinor] = version.split('.').map(Number);
const [requiredMajor, requiredMinor] = MIN_CUDA_VERSION.split('.').map(Number);
const isMatchRequired = currentMajor > requiredMajor || (currentMajor === requiredMajor && currentMinor >= requiredMinor);
if (!isMatchRequired) {
console.error(`CUDA version ${version} is not compatible with TensorRT-LLM models. Required version: ${MIN_CUDA_VERSION}`);
process.exit(1);
}
} catch (e) {
console.error(e.message ?? e);
process.exit(1);
}

}
};

0 comments on commit 5859371

Please sign in to comment.