From cf35eec3a05f4e3f215d91f0ebd0d7f132b16e1f Mon Sep 17 00:00:00 2001 From: James Date: Fri, 17 May 2024 10:16:32 +0700 Subject: [PATCH 1/2] chore: fix local path --- cortex-js/src/command.module.ts | 2 +- .../commanders/models/model-get.command.ts | 1 - .../commanders/models/model-list.command.ts | 1 - .../commanders/models/model-pull.command.ts | 5 +- .../types/init-options.interface.ts | 12 +- .../usecases/cli.usecases.module.ts | 22 +- .../commanders/usecases/init.cli.usecases.ts | 414 +++++++++--------- .../usecases/models.cli.usecases.ts | 11 +- .../providers/cortex/cortex.provider.ts | 2 +- 9 files changed, 241 insertions(+), 229 deletions(-) diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index 4627852f1..d15758746 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -35,7 +35,7 @@ import { CliUsecasesModule } from './infrastructure/commanders/usecases/cli.usec ChatModule, ExtensionModule, HttpModule, - CliUsecasesModule + CliUsecasesModule, ], providers: [ CortexCommand, diff --git a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts index 850e738c0..15136adc6 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts @@ -1,4 +1,3 @@ -import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; import { exit } from 'node:process'; diff --git a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts index a0c17fcfe..6e491fc8d 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts @@ -1,4 +1,3 @@ -import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index eae732704..4b8bc2952 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -24,10 +24,7 @@ export class ModelPullCommand extends CommandRunner { const callback = (progress: number) => { bar.update(progress); }; - await this.modelsCliUsecases.pullModel( - input[0], - callback, - ); + await this.modelsCliUsecases.pullModel(input[0], callback); console.log('\nDownload complete!'); exit(0); } diff --git a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts index f65e4c7cd..24d460bbb 100644 --- a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts +++ b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts @@ -1,7 +1,7 @@ export interface InitOptions { - runMode?: 'CPU' | 'GPU'; - gpuType?: 'Nvidia' | 'Others (Vulkan)'; - instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; - cudaVersion?: '11' | '12'; - installCuda?: 'Yes' | string -} \ No newline at end of file + runMode?: 'CPU' | 'GPU'; + gpuType?: 'Nvidia' | 'Others (Vulkan)'; + instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; + cudaVersion?: '11' | '12'; + installCuda?: 'Yes' | string; +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts index a01216c8c..a82b60dd0 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts @@ -1,13 +1,13 @@ -import { Module } from "@nestjs/common"; -import { InitCliUsecases } from "./init.cli.usecases"; -import { HttpModule } from "@nestjs/axios"; -import { ModelsCliUsecases } from "./models.cli.usecases"; -import { ModelsModule } from "@/usecases/models/models.module"; +import { Module } from '@nestjs/common'; +import { InitCliUsecases } from './init.cli.usecases'; +import { HttpModule } from '@nestjs/axios'; +import { ModelsCliUsecases } from './models.cli.usecases'; +import { ModelsModule } from '@/usecases/models/models.module'; @Module({ - imports: [HttpModule, ModelsModule], - controllers: [], - providers: [InitCliUsecases, ModelsCliUsecases], - exports: [InitCliUsecases, ModelsCliUsecases], - }) - export class CliUsecasesModule {} \ No newline at end of file + imports: [HttpModule, ModelsModule], + controllers: [], + providers: [InitCliUsecases, ModelsCliUsecases], + exports: [InitCliUsecases, ModelsCliUsecases], +}) +export class CliUsecasesModule {} diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts index 0b9ba2e8e..47456e4f9 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -9,211 +9,225 @@ import { Injectable } from '@nestjs/common'; @Injectable() export class InitCliUsecases { - CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases'; - CUDA_DOWNLOAD_URL = 'https://catalog.jan.ai/dist/cuda-dependencies///cuda.tar.gz' + CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases'; + CUDA_DOWNLOAD_URL = + 'https://catalog.jan.ai/dist/cuda-dependencies///cuda.tar.gz'; + + constructor(private readonly httpService: HttpService) {} + + installEngine = async ( + engineFileName: string, + version: string = 'latest', + ): Promise => { + const res = await this.httpService + .get( + this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`, + { + headers: { + 'X-GitHub-Api-Version': '2022-11-28', + Accept: 'application/vnd.github+json', + }, + }, + ) + .toPromise(); + + if (!res?.data) { + console.log('Failed to fetch releases'); + exit(1); + } + + let release = res?.data; + if (Array.isArray(res?.data)) { + release = Array(res?.data)[0].find( + (e) => e.name === version.replace('v', ''), + ); + } + const toDownloadAsset = release.assets.find((s: any) => + s.name.includes(engineFileName), + ); - constructor( - private readonly httpService: HttpService, - ) { + if (!toDownloadAsset) { + console.log(`Could not find engine file ${engineFileName}`); + exit(1); } - installEngine = async ( - engineFileName: string, - version: string = 'latest', - ): Promise => { - const res = await this.httpService - .get( - this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`, - { - headers: { - 'X-GitHub-Api-Version': '2022-11-28', - Accept: 'application/vnd.github+json', - }, - }, - ) - .toPromise(); - - if (!res?.data) { - console.log('Failed to fetch releases'); - exit(1); - } - - let release = res?.data; - if (Array.isArray(res?.data)) { - release = Array(res?.data)[0].find( - (e) => e.name === version.replace('v', ''), - ); - } - const toDownloadAsset = release.assets.find((s: any) => - s.name.includes(engineFileName), - ); - - if (!toDownloadAsset) { - console.log(`Could not find engine file ${engineFileName}`); - exit(1); - } - - console.log(`Downloading engine file ${engineFileName}`); - const engineDir = resolve(this.rootDir(), 'cortex-cpp'); - if (existsSync(engineDir)) rmSync(engineDir, { recursive: true }); - - const download = await this.httpService - .get(toDownloadAsset.browser_download_url, { - responseType: 'stream', - }) - .toPromise(); - if (!download) { - console.log('Failed to download model'); - process.exit(1) - } - - const destination = resolve(this.rootDir(), toDownloadAsset.name); - - await new Promise((resolve, reject) => { - const writer = createWriteStream(destination); - let receivedBytes = 0; - const totalBytes = download.headers['content-length']; - - writer.on('finish', () => { - bar.stop(); - resolve(true); - }); - - writer.on('error', (error) => { - bar.stop(); - reject(error); - }); - - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - - download.data.on('data', (chunk: any) => { - receivedBytes += chunk.length; - bar.update(Math.floor((receivedBytes / totalBytes) * 100)); - }); - - download.data.pipe(writer); - }); - - try { - await decompress( - resolve(this.rootDir(), destination), - resolve(this.rootDir()), - ); - } catch (e) { - console.log(e); - exit(1); - } - }; - - parseEngineFileName = (options: InitOptions) => { - const platform = - process.platform === 'win32' - ? 'windows' - : process.platform === 'darwin' - ? 'mac' - : process.platform; - const arch = process.arch === 'arm64' ? process.arch : 'amd64'; - const cudaVersion = - options.runMode === 'GPU' - ? options.gpuType === 'Nvidia' - ? '-cuda-' + (options.cudaVersion === '11' ? '11-7' : '12-0') - : '-vulkan' - : ''; - const instructions = options.instructions ? `-${options.instructions}` : ''; - const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`; - return `${engineName}.tar.gz`; - }; - - rootDir = () => resolve(__dirname, `../../../`); - - cudaVersion = async () => { - let filesCuda12: string[] - let filesCuda11: string[] - let paths: string[] - - if (process.platform === 'win32') { - filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll'] - filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll'] - paths = process.env.PATH ? process.env.PATH.split(delimiter) : [] - } else { - filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12'] - filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11'] - paths = process.env.LD_LIBRARY_PATH - ? process.env.LD_LIBRARY_PATH.split(delimiter) - : [] - paths.push('/usr/lib/x86_64-linux-gnu/') - } - - if (filesCuda12.every( - (file) => existsSync(file) || this.checkFileExistenceInPaths(file, paths) - )) return '12' - - - if (filesCuda11.every( - (file) => existsSync(file) || this.checkFileExistenceInPaths(file, paths) - )) return '11' - - return undefined // No CUDA Toolkit found + console.log(`Downloading engine file ${engineFileName}`); + const engineDir = resolve(this.rootDir(), 'cortex-cpp'); + if (existsSync(engineDir)) rmSync(engineDir, { recursive: true }); + + const download = await this.httpService + .get(toDownloadAsset.browser_download_url, { + responseType: 'stream', + }) + .toPromise(); + if (!download) { + console.log('Failed to download model'); + process.exit(1); + } + + const destination = resolve(this.rootDir(), toDownloadAsset.name); + + await new Promise((resolve, reject) => { + const writer = createWriteStream(destination); + let receivedBytes = 0; + const totalBytes = download.headers['content-length']; + + writer.on('finish', () => { + bar.stop(); + resolve(true); + }); + + writer.on('error', (error) => { + bar.stop(); + reject(error); + }); + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + + download.data.on('data', (chunk: any) => { + receivedBytes += chunk.length; + bar.update(Math.floor((receivedBytes / totalBytes) * 100)); + }); + + download.data.pipe(writer); + }); + + try { + await decompress( + resolve(this.rootDir(), destination), + resolve(this.rootDir()), + ); + } catch (e) { + console.error('Error decompressing file', e); + exit(1); + } + }; + + parseEngineFileName = (options: InitOptions) => { + const platform = + process.platform === 'win32' + ? 'windows' + : process.platform === 'darwin' + ? 'mac' + : process.platform; + const arch = process.arch === 'arm64' ? process.arch : 'amd64'; + const cudaVersion = + options.runMode === 'GPU' + ? options.gpuType === 'Nvidia' + ? '-cuda-' + (options.cudaVersion === '11' ? '11-7' : '12-0') + : '-vulkan' + : ''; + const instructions = options.instructions ? `-${options.instructions}` : ''; + const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`; + return `${engineName}.tar.gz`; + }; + + rootDir = () => resolve(__dirname, `../../../../`); + + cudaVersion = async () => { + let filesCuda12: string[]; + let filesCuda11: string[]; + let paths: string[]; + + if (process.platform === 'win32') { + filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']; + filesCuda11 = [ + 'cublas64_11.dll', + 'cudart64_110.dll', + 'cublasLt64_11.dll', + ]; + paths = process.env.PATH ? process.env.PATH.split(delimiter) : []; + } else { + filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']; + filesCuda11 = [ + 'libcudart.so.11.0', + 'libcublas.so.11', + 'libcublasLt.so.11', + ]; + paths = process.env.LD_LIBRARY_PATH + ? process.env.LD_LIBRARY_PATH.split(delimiter) + : []; + paths.push('/usr/lib/x86_64-linux-gnu/'); } - checkFileExistenceInPaths = (file: string, paths: string[]): boolean => { - return paths.some((p) => existsSync(join(p, file))) + if ( + filesCuda12.every( + (file) => + existsSync(file) || this.checkFileExistenceInPaths(file, paths), + ) + ) + return '12'; + + if ( + filesCuda11.every( + (file) => + existsSync(file) || this.checkFileExistenceInPaths(file, paths), + ) + ) + return '11'; + + return undefined; // No CUDA Toolkit found + }; + + checkFileExistenceInPaths = (file: string, paths: string[]): boolean => { + return paths.some((p) => existsSync(join(p, file))); + }; + + installCudaToolkitDependency = async (options: InitOptions) => { + const platform = process.platform === 'win32' ? 'windows' : 'linux'; + + const url = this.CUDA_DOWNLOAD_URL.replace( + '', + options.cudaVersion === '11' ? '11.7' : '12.0', + ).replace('', platform); + const destination = resolve(this.rootDir(), 'cuda-toolkit.tar.gz'); + + const download = await this.httpService + .get(url, { + responseType: 'stream', + }) + .toPromise(); + + if (!download) { + console.log('Failed to download dependency'); + process.exit(1); } - installCudaToolkitDependency = async (options: InitOptions) => { - const platform = process.platform === 'win32' ? 'windows' : 'linux' - - const url = this.CUDA_DOWNLOAD_URL - .replace('', options.cudaVersion === '11' ? '11.7' : '12.0') - .replace('', platform) - const destination = resolve(this.rootDir(), 'cuda-toolkit.tar.gz'); - - const download = await this.httpService - .get(url, { - responseType: 'stream', - }) - .toPromise(); - - if (!download) { - console.log('Failed to download dependency'); - process.exit(1) - } - - await new Promise((resolve, reject) => { - const writer = createWriteStream(destination); - let receivedBytes = 0; - const totalBytes = download.headers['content-length']; - - writer.on('finish', () => { - bar.stop(); - resolve(true); - }); - - writer.on('error', (error) => { - bar.stop(); - reject(error); - }); - - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - - download.data.on('data', (chunk: any) => { - receivedBytes += chunk.length; - bar.update(Math.floor((receivedBytes / totalBytes) * 100)); - }); - - download.data.pipe(writer); - }); - - try { - await decompress( - resolve(this.rootDir(), destination), - resolve(this.rootDir(), 'cortex-cpp'), - ); - } catch (e) { - console.log(e); - exit(1); - } + await new Promise((resolve, reject) => { + const writer = createWriteStream(destination); + let receivedBytes = 0; + const totalBytes = download.headers['content-length']; + + writer.on('finish', () => { + bar.stop(); + resolve(true); + }); + + writer.on('error', (error) => { + bar.stop(); + reject(error); + }); + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + + download.data.on('data', (chunk: any) => { + receivedBytes += chunk.length; + bar.update(Math.floor((receivedBytes / totalBytes) * 100)); + }); + + download.data.pipe(writer); + }); + + try { + await decompress( + resolve(this.rootDir(), destination), + resolve(this.rootDir(), 'cortex-cpp'), + ); + } catch (e) { + console.log(e); + exit(1); } -} \ No newline at end of file + }; +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index cdb3110cf..2aa58047f 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -85,12 +85,15 @@ export class ModelsCliUsecases { type: 'list', name: 'quantization', message: 'Select quantization', - choices: data.siblings.map((e) => e.quantization).filter((e) => !!e), + // @ts-expect-error "quantization" won't be null + choices: data.siblings + .map((e) => e.quantization) + .filter((e) => e != null), }); - const sibling = data.siblings.filter((e) => !!e.quantization).find( - (e: any) => e.quantization === quantization, - ); + const sibling = data.siblings + .filter((e) => !!e.quantization) + .find((e: any) => e.quantization === quantization); if (!sibling) throw 'No expected quantization found'; const stopWords: string[] = []; diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts index 21fce643f..941f1b860 100644 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts +++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts @@ -24,7 +24,7 @@ export default class CortexProvider extends OAIEngineExtension { super(httpService); } - modelDir = () => resolve(__dirname, `../../../models`); + modelDir = () => resolve(__dirname, `../../../../models`); override async loadModel( model: Model, From e4ed6cda14f87751565182db9166da88e01073dd Mon Sep 17 00:00:00 2001 From: James Date: Fri, 17 May 2024 10:48:17 +0700 Subject: [PATCH 2/2] fix: get metadata for selected quantization only --- .../domain/models/huggingface.interface.ts | 1 - .../commanders/models/model-pull.command.ts | 8 +--- .../usecases/models.cli.usecases.ts | 39 ++++++++++--------- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/cortex-js/src/domain/models/huggingface.interface.ts b/cortex-js/src/domain/models/huggingface.interface.ts index 86cb6a955..cfb1dc5a3 100644 --- a/cortex-js/src/domain/models/huggingface.interface.ts +++ b/cortex-js/src/domain/models/huggingface.interface.ts @@ -17,7 +17,6 @@ export interface HuggingFaceRepoData { downloadUrl?: string; fileSize?: number; quantization?: Quantization; - stopWord?: string; }[]; createdAt: string; } diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index 4b8bc2952..c1a1af7ac 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -1,5 +1,4 @@ import { CommandRunner, SubCommand } from 'nest-commander'; -import { Presets, SingleBar } from 'cli-progress'; import { exit } from 'node:process'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; @@ -19,12 +18,7 @@ export class ModelPullCommand extends CommandRunner { exit(1); } - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - const callback = (progress: number) => { - bar.update(progress); - }; - await this.modelsCliUsecases.pullModel(input[0], callback); + await this.modelsCliUsecases.pullModel(input[0]); console.log('\nDownload complete!'); exit(0); } diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index 2aa58047f..68944fee5 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -6,6 +6,7 @@ import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface'; import { gguf } from '@huggingface/gguf'; import { InquirerService } from 'nest-commander'; import { Inject, Injectable } from '@nestjs/common'; +import { Presets, SingleBar } from 'cli-progress'; const AllQuantizations = [ 'Q3_K_S', @@ -71,11 +72,16 @@ export class ModelsCliUsecases { return this.modelsUsecases.remove(modelId); } - async pullModel(modelId: string, callback: (progress: number) => void) { + async pullModel(modelId: string) { if (modelId.includes('/')) { await this.pullHuggingFaceModel(modelId); } + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + const callback = (progress: number) => { + bar.update(progress); + }; await this.modelsUsecases.downloadModel(modelId, callback); } @@ -96,9 +102,21 @@ export class ModelsCliUsecases { .find((e: any) => e.quantization === quantization); if (!sibling) throw 'No expected quantization found'; + + let stopWord = ''; + try { + const { metadata } = await gguf(sibling.downloadUrl!); + // @ts-expect-error "tokenizer.ggml.eos_token_id" + const index = metadata['tokenizer.ggml.eos_token_id']; + // @ts-expect-error "tokenizer.ggml.tokens" + stopWord = metadata['tokenizer.ggml.tokens'][index] ?? ''; + } catch (err) { + console.log('Failed to get stop word: ', err); + } + const stopWords: string[] = []; - if (sibling.stopWord) { - stopWords.push(sibling.stopWord); + if (stopWord.length > 0) { + stopWords.push(stopWord); } const model: CreateModelDto = { @@ -149,21 +167,6 @@ export class ModelsCliUsecases { for (let i = 0; i < data.siblings.length; i++) { const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${data.siblings[i].rfilename}`; data.siblings[i].downloadUrl = downloadUrl; - - if (downloadUrl.endsWith('.gguf')) { - // getting stop word - let stopWord = ''; - try { - const { metadata } = await gguf(downloadUrl); - // @ts-expect-error "tokenizer.ggml.eos_token_id" - const index = metadata['tokenizer.ggml.eos_token_id']; - // @ts-expect-error "tokenizer.ggml.tokens" - stopWord = metadata['tokenizer.ggml.tokens'][index] ?? ''; - data.siblings[i].stopWord = stopWord; - } catch (err) { - console.log('Failed to get stop word: ', err); - } - } } AllQuantizations.forEach((quantization) => {