Skip to content

Commit

Permalink
chore: download model files
Browse files Browse the repository at this point in the history
  • Loading branch information
louis-jan committed Jun 25, 2024
1 parent d8dcd23 commit 3e3fe7d
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ export class InitCliUsecases {
process.exit(1);
}

if (engine !== 'cortex.llamacpp') await this.installAcceleratedEngine();
if (engine !== 'cortex.llamacpp')
await this.installAcceleratedEngine('latest', engine);

configs.initialized = true;
await this.fileManagerService.writeConfigFile(configs);
Expand Down Expand Up @@ -348,7 +349,7 @@ export class InitCliUsecases {
exit(1);
}

console.log(`Downloading ONNX engine file ${toDownloadAsset.name}`);
console.log(`Downloading engine file ${toDownloadAsset.name}`);
const dataFolderPath = await this.fileManagerService.getDataFolderPath();
const engineDir = join(dataFolderPath, 'cortex-cpp');

Expand Down Expand Up @@ -402,7 +403,7 @@ export class InitCliUsecases {
for (const file of readdirSync(join(engineDir, 'engines', engine))) {
if (file !== 'engine.dll') {
await cpSync(
join(engineDir, 'engines', 'cortex.onnx', file),
join(engineDir, 'engines', engine, file),
join(engineDir, file),
);
}
Expand Down
50 changes: 50 additions & 0 deletions cortex-js/src/utils/cuda.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ import { existsSync } from 'fs';
import { delimiter } from 'path';
import { checkFileExistenceInPaths } from './app-path';

export type GpuSettingInfo = {
id: string;
vram: string;
name: string;
arch?: string;
};

/**
* Return the CUDA version installed on the system
* @returns CUDA Version 11 | 12
Expand Down Expand Up @@ -63,3 +70,46 @@ export const checkNvidiaGPUExist = (): Promise<boolean> => {
});
});
};

/**
* Get GPU information from the system
* @returns GPU information
*/
export const getGpuInfo = async (): Promise<GpuSettingInfo[]> =>
new Promise((resolve) => {
exec(
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
async (error, stdout) => {
if (!error) {
// Get GPU info and gpu has higher memory first
let highestVram = 0;
let highestVramId = '0';
const gpus: GpuSettingInfo[] = stdout
.trim()
.split('\n')
.map((line) => {
let [id, vram, name] = line.split(', ');
const arch = getGpuArch(name);
vram = vram.replace(/\r/g, '');
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram);
highestVramId = id;
}
return { id, vram, name, arch };
});

resolve(gpus);
} else {
resolve([]);
}
},
);
});

const getGpuArch = (gpuName: string): string => {
if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown';

if (gpuName.includes('30')) return 'ampere';
else if (gpuName.includes('40')) return 'ada';
else return 'unknown';
};
6 changes: 2 additions & 4 deletions cortex-js/src/utils/huggingface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
} from '@/infrastructure/constants/prompt-constants';
import { gguf } from '@huggingface/gguf';
import axios from 'axios';
import { parseModelHubEngineBranch } from './normalize-model-id';

// TODO: move this to somewhere else, should be reused by API as well. Maybe in a separate service / provider?
export function guessPromptTemplateFromHuggingFace(jinjaCode?: string): string {
Expand Down Expand Up @@ -64,7 +65,6 @@ export function guessPromptTemplateFromHuggingFace(jinjaCode?: string): string {
export async function fetchHuggingFaceRepoData(
repoId: string,
): Promise<HuggingFaceRepoData> {

const sanitizedUrl = getRepoModelsUrl(repoId);

const { data: response } = await axios.get(sanitizedUrl);
Expand Down Expand Up @@ -113,7 +113,7 @@ export async function fetchJanRepoData(
modelId: string,
): Promise<HuggingFaceRepoData> {
const repo = modelId.split(':')[0];
const tree = modelId.split(':')[1] ?? 'default';
const tree = await parseModelHubEngineBranch(modelId.split(':')[1] ?? 'default');
const url = getRepoModelsUrl(`cortexhub/${repo}`, tree);

const res = await fetch(url);
Expand Down Expand Up @@ -164,8 +164,6 @@ export async function fetchJanRepoData(

data.modelUrl = url;



return data;
}

Expand Down
25 changes: 25 additions & 0 deletions cortex-js/src/utils/normalize-model-id.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { ModelArtifact } from '@/domain/models/model.interface';
import { getGpuInfo } from './cuda';

export const normalizeModelId = (modelId: string): string => {
return modelId.replace(':default', '').replace(/[:/]/g, '-');
Expand All @@ -13,3 +14,27 @@ export const isLocalModel = (
!/^(http|https):\/\/[^/]+\/.*/.test(modelFiles[0])
);
};

/**
* Parse the model hub engine branch
* @param branch
* @returns
*/
export const parseModelHubEngineBranch = async (
branch: string,
): Promise<string> => {
if (branch.includes('tensorrt')) {
let engineBranch = branch;
const platform = process.platform == 'win32' ? 'windows' : 'linux';
if (!engineBranch.includes(platform)) {
engineBranch += `-${platform}`;
}

const gpus = await getGpuInfo();
if (gpus[0]?.arch && !engineBranch.includes(gpus[0].arch)) {
engineBranch += `-${gpus[0].arch}`;
}
return engineBranch;
}
return branch;
};

0 comments on commit 3e3fe7d

Please sign in to comment.