Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ship ONNX runtime on Windows #716

Merged
merged 3 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ import { ModelNotFoundException } from '@/infrastructure/exception/model-not-fou
aliases: ['download'],
arguments: '<model_id>',
argsDescription: { model_id: 'Model repo to pull' },
description: 'Download a model. Working with HuggingFace model id.',
description:
'Download a model from a registry. Working with HuggingFace repositories. For available models, please visit https://huggingface.co/cortexhub',
})
@SetCommandContext()
export class ModelPullCommand extends CommandRunner {
Expand Down
119 changes: 117 additions & 2 deletions cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
import { createWriteStream, existsSync, rmSync } from 'fs';
import {
cpSync,
createWriteStream,
existsSync,
readdir,
readdirSync,
rmSync,
} from 'fs';
import { delimiter, join } from 'path';
import { HttpService } from '@nestjs/axios';
import { Presets, SingleBar } from 'cli-progress';
Expand All @@ -12,6 +19,7 @@ import { rm } from 'fs/promises';
import { exec } from 'child_process';
import { appPath } from '@/utils/app-path';
import {
CORTEX_ONNX_ENGINE_RELEASES_URL,
CORTEX_RELEASES_URL,
CUDA_DOWNLOAD_URL,
} from '@/infrastructure/constants/cortex';
Expand Down Expand Up @@ -59,7 +67,7 @@ export class InitCliUsecases {
exit(1);
}

console.log(`Downloading engine file ${engineFileName}`);
console.log(`Downloading Llama.cpp engine file ${engineFileName}`);
const dataFolderPath = await this.fileManagerService.getDataFolderPath();
const engineDir = join(dataFolderPath, 'cortex-cpp');
if (existsSync(engineDir)) rmSync(engineDir, { recursive: true });
Expand Down Expand Up @@ -109,6 +117,9 @@ export class InitCliUsecases {
exit(1);
}
await rm(destination, { force: true });

// Ship ONNX Runtime on Windows by default
if (process.platform === 'win32') await this.installONNXEngine();
};

parseEngineFileName = (options?: InitOptions) => {
Expand Down Expand Up @@ -187,6 +198,7 @@ export class InitCliUsecases {
).replace('<platform>', platform);
const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz');

console.log('Downloading CUDA Toolkit dependency...');
const download = await firstValueFrom(
this.httpService.get(url, {
responseType: 'stream',
Expand Down Expand Up @@ -283,6 +295,109 @@ export class InitCliUsecases {
});
};

/**
* Download and install ONNX engine
* @param version
* @param engineFileName
*/
async installONNXEngine(
version: string = 'latest',
engineFileName: string = 'windows-amd64',
) {
const res = await firstValueFrom(
this.httpService.get(
CORTEX_ONNX_ENGINE_RELEASES_URL +
`${version === 'latest' ? '/latest' : ''}`,
{
headers: {
'X-GitHub-Api-Version': '2022-11-28',
Accept: 'application/vnd.github+json',
},
},
),
);

if (!res?.data) {
console.log('Failed to fetch releases');
exit(1);
}

let release = res?.data;
if (Array.isArray(res?.data)) {
release = Array(res?.data)[0].find(
(e) => e.name === version.replace('v', ''),
);
}
const toDownloadAsset = release.assets.find((s: any) =>
s.name.includes(engineFileName),
);

if (!toDownloadAsset) {
console.log(`Could not find engine file ${engineFileName}`);
exit(1);
}

console.log(`Downloading ONNX engine file ${engineFileName}`);
const dataFolderPath = await this.fileManagerService.getDataFolderPath();
const engineDir = join(dataFolderPath, 'cortex-cpp');

const download = await firstValueFrom(
this.httpService.get(toDownloadAsset.browser_download_url, {
responseType: 'stream',
}),
);
if (!download) {
console.log('Failed to download model');
process.exit(1);
}

const destination = join(dataFolderPath, toDownloadAsset.name);

await new Promise((resolve, reject) => {
const writer = createWriteStream(destination);
let receivedBytes = 0;
const totalBytes = download.headers['content-length'];

writer.on('finish', () => {
bar.stop();
resolve(true);
});

writer.on('error', (error) => {
bar.stop();
reject(error);
});

const bar = new SingleBar({}, Presets.shades_classic);
bar.start(100, 0);

download.data.on('data', (chunk: any) => {
receivedBytes += chunk.length;
bar.update(Math.floor((receivedBytes / totalBytes) * 100));
});

download.data.pipe(writer);
});

try {
await decompress(destination, join(engineDir, 'engines'));
} catch (e) {
console.error('Error decompressing file', e);
exit(1);
}
await rm(destination, { force: true });

// Copy the additional files to the cortex-cpp directory
for (const file of readdirSync(join(engineDir, 'engines', 'cortex.onnx'))) {
if (file !== 'engine.dll') {
await cpSync(
join(engineDir, 'engines', 'cortex.onnx', file),
join(engineDir, file),
);
}
}
}

private checkFileExistenceInPaths = (
file: string,
paths: string[],
Expand Down
128 changes: 106 additions & 22 deletions cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import { load } from 'js-yaml';
import { existsSync, readdirSync, readFileSync } from 'fs';
import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id';
import { getHFModelMetadata } from '@/utils/huggingface';
import { createWriteStream, mkdirSync, promises } from 'node:fs';
import { firstValueFrom } from 'rxjs';

@Injectable()
export class ModelsCliUsecases {
Expand Down Expand Up @@ -118,40 +120,116 @@ export class ModelsCliUsecases {
process.exit(1);
}

await this.pullHuggingFaceModel(modelId);
const bar = new SingleBar({}, Presets.shades_classic);
bar.start(100, 0);
const callback = (progress: number) => {
bar.update(progress);
};
if (modelId.includes('onnx')) {
await this.pullOnnxModel(modelId);
} else {
await this.pullGGUFModel(modelId);
const bar = new SingleBar({}, Presets.shades_classic);
bar.start(100, 0);
const callback = (progress: number) => {
bar.update(progress);
};

try {
await this.modelsUsecases.downloadModel(modelId, callback);

const model = await this.modelsUsecases.findOne(modelId);
const fileUrl = join(
await this.fileService.getModelsPath(),
normalizeModelId(modelId),
basename((model?.files as string[])[0]),
);
await this.modelsUsecases.update(modelId, {
files: [fileUrl],
name: modelId.replace(':default', ''),
});
} catch (err) {
bar.stop();
throw err;
}
}
}

/**
* It's to pull ONNX model from HuggingFace repository
* @param modelId
*/
private async pullOnnxModel(modelId: string) {
const modelsContainerDir = await this.fileService.getModelsPath();

if (!existsSync(modelsContainerDir)) {
mkdirSync(modelsContainerDir, { recursive: true });
}

const modelFolder = join(modelsContainerDir, normalizeModelId(modelId));
await promises.mkdir(modelFolder, { recursive: true }).catch(() => {});

try {
await this.modelsUsecases.downloadModel(modelId, callback);
const files = [
'genai_config.json',
'model.onnx',
'model.onnx.data',
'model.yml',
'special_tokens_map.json',
'tokenizer.json',
'tokenizer_config.json',
];
const repo = modelId.split(':')[0];
const branch = modelId.split(':')[1] || 'default';
for (const file of files) {
console.log(`Downloading ${file}`);
const bar = new SingleBar({}, Presets.shades_classic);
bar.start(100, 0);

const model = await this.modelsUsecases.findOne(modelId);
const fileUrl = join(
await this.fileService.getModelsPath(),
normalizeModelId(modelId),
basename((model?.files as string[])[0]),
const response = await firstValueFrom(
this.httpService.get(
`https://huggingface.co/cortexhub/${repo}/resolve/${branch}/${file}?download=true`,
{
responseType: 'stream',
},
),
);
await this.modelsUsecases.update(modelId, {
files: [fileUrl],
name: modelId.replace(':default', ''),
if (!response) {
throw new Error('Failed to download model');
}

await new Promise((resolve, reject) => {
const writer = createWriteStream(join(modelFolder, file));
let receivedBytes = 0;
const totalBytes = response.headers['content-length'];

writer.on('finish', () => {
resolve(true);
});

writer.on('error', (error) => {
reject(error);
});

response.data.on('data', (chunk: any) => {
receivedBytes += chunk.length;
bar.update(Math.floor((receivedBytes / totalBytes) * 100));
});

response.data.pipe(writer);
});
} catch (err) {
bar.stop();
throw err;
}
}

//// PRIVATE METHODS ////
const model: CreateModelDto = load(
readFileSync(join(modelFolder, 'model.yml'), 'utf-8'),
) as CreateModelDto;
model.files = [join(modelFolder)];
model.model = modelId

if (!(await this.modelsUsecases.findOne(modelId)))
await this.modelsUsecases.create(model);
}
/**
* It's to pull model from HuggingFace repository
* It could be a model from Jan's repo or other authors
* @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b"
*/
private async pullHuggingFaceModel(modelId: string) {
private async pullGGUFModel(modelId: string) {
const data: HuggingFaceRepoData =
await this.modelsUsecases.fetchModelMetadata(modelId);

Expand Down Expand Up @@ -179,6 +257,7 @@ export class ModelsCliUsecases {
} else {
modelVersion = data.siblings.find((e) => e.rfilename.includes('.gguf'));
}

if (!modelVersion) throw 'No expected quantization found';
const metadata = await getHFModelMetadata(modelVersion.downloadUrl!);

Expand All @@ -203,12 +282,17 @@ export class ModelsCliUsecases {
// Default Model Settings
ctx_len: 4096,
ngl: 100,
engine: 'cortex.llamacpp',
engine: modelId.includes('onnx') ? 'cortex.onnx' : 'cortex.llamacpp',
};
if (!(await this.modelsUsecases.findOne(modelId)))
await this.modelsUsecases.create(model);
}

/**
* Parse preset file
* @param preset
* @returns
*/
private async parsePreset(preset?: string): Promise<object> {
const presetsFolder = await this.fileService.getPresetsPath();

Expand Down
3 changes: 3 additions & 0 deletions cortex-js/src/infrastructure/constants/cortex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ export const CORTEX_JS_STOP_API_SERVER_URL = (
export const CORTEX_RELEASES_URL =
'https://api.github.com/repos/janhq/cortex/releases';

export const CORTEX_ONNX_ENGINE_RELEASES_URL =
'https://api.github.com/repos/janhq/cortex.onnx/releases';

export const CUDA_DOWNLOAD_URL =
'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz';

Expand Down
2 changes: 1 addition & 1 deletion cortex-js/src/infrastructure/constants/huggingface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ export const HUGGING_FACE_TREE_REF_URL = (
repo: string,
tree: string,
path: string,
) => `https://huggingface.co/janhq/${repo}/resolve/${tree}/${path}`;
) => `https://huggingface.co/cortexhub/${repo}/resolve/${tree}/${path}`;

export const HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL = (
author: string,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file-

@Injectable()
export default class CortexProvider extends OAIEngineExtension {
provider: string = 'cortex.llamacpp';
provider: string = 'cortex';
apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`;

private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ import { existsSync } from 'fs';
@Injectable()
export class ExtensionRepositoryImpl implements ExtensionRepository {
// Initialize the Extensions Map with the key-value pairs of the core providers.
extensions = new Map<string, Extension>([['cortex', this.cortexProvider]]);
extensions = new Map<string, Extension>([
['cortex.llamacpp', this.cortexProvider],
['cortex.onnx', this.cortexProvider],
]);

constructor(
@Inject('CORTEX_PROVIDER')
Expand Down
Loading
Loading