Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add engine pull support for tensorrt-llm #765

Merged
merged 5 commits into from
Jun 25, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
chore: update engines enum
louis-menlo committed Jun 25, 2024

Verified

This commit was signed with the committer’s verified signature.
Exirel Florian Strzelecki
commit f733db5c0be84dd25324e43247ef3bcfe07ed383
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@ import { InitCliUsecases } from '../usecases/init.cli.usecases';
import { existsSync } from 'node:fs';
import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service';
import { join } from 'node:path';
import { Engines } from '../types/engine.interface';

type ModelStartOptions = {
attach: boolean;
@@ -71,7 +72,7 @@ export class ModelStartCommand extends CommandRunner {
engine,
);
}
if (engine === 'cortex.onnx' && process.platform !== 'win32') {
if (engine === Engines.onnx && process.platform !== 'win32') {
console.error('The ONNX engine does not support this OS yet.');
process.exit(1);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
export enum Engines {
llamaCPP = 'cortex.llamacpp',
onnx = 'cortex.onnx',
tensorrtLLM = 'cortex.tensorrt-llm',
}
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@ import {
CUDA_DOWNLOAD_URL,
} from '@/infrastructure/constants/cortex';
import { checkNvidiaGPUExist, cudaVersion } from '@/utils/cuda';
import { Engines } from '../types/engine.interface';

@Injectable()
export class InitCliUsecases {
@@ -70,7 +71,7 @@ export class InitCliUsecases {
)
await this.installLlamaCppEngine(options, version);

if (engine === 'cortex.onnx' && process.platform !== 'win32') {
if (engine === Engines.onnx && process.platform !== 'win32') {
console.error('The ONNX engine does not support this OS yet.');
process.exit(1);
}
@@ -312,7 +313,7 @@ export class InitCliUsecases {
*/
private async installAcceleratedEngine(
version: string = 'latest',
engine: string = 'cortex.onnx',
engine: string = Engines.onnx,
) {
const res = await firstValueFrom(
this.httpService.get(
Original file line number Diff line number Diff line change
@@ -16,12 +16,10 @@ import { join, basename } from 'path';
import { load } from 'js-yaml';
import { existsSync, readdirSync, readFileSync } from 'fs';
import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id';
import {
fetchJanRepoData,
getHFModelMetadata,
} from '@/utils/huggingface';
import { fetchJanRepoData, getHFModelMetadata } from '@/utils/huggingface';
import { createWriteStream, mkdirSync, promises } from 'node:fs';
import { firstValueFrom } from 'rxjs';
import { Engines } from '../types/engine.interface';

@Injectable()
export class ModelsCliUsecases {
@@ -271,7 +269,7 @@ export class ModelsCliUsecases {
// Default Model Settings
ctx_len: 4096,
ngl: 100,
engine: modelId.includes('onnx') ? 'cortex.onnx' : 'cortex.llamacpp',
engine: Engines.llamaCPP,
};
if (!(await this.modelsUsecases.findOne(modelId)))
await this.modelsUsecases.create(model);
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ import { EngineExtension } from '@/domain/abstracts/engine.abstract';
import { appPath } from '@/utils/app-path';
import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service';
import { existsSync } from 'fs';
import { Engines } from '@/infrastructure/commanders/types/engine.interface';

@Injectable()
export class ExtensionRepositoryImpl implements ExtensionRepository {
@@ -18,9 +19,9 @@ export class ExtensionRepositoryImpl implements ExtensionRepository {
private readonly cortexProvider: EngineExtension,
private readonly fileService: FileManagerService,
) {
this.extensions.set('cortex.llamacpp', this.cortexProvider);
this.extensions.set('cortex.onnx', this.cortexProvider);
this.extensions.set('cortex.tensorrt-llm', this.cortexProvider);
this.extensions.set(Engines.llamaCPP, this.cortexProvider);
this.extensions.set(Engines.onnx, this.cortexProvider);
this.extensions.set(Engines.tensorrtLLM, this.cortexProvider);
this.loadCoreExtensions();
this.loadExternalExtensions();
}
3 changes: 2 additions & 1 deletion cortex-js/src/usecases/models/models.usecases.ts
Original file line number Diff line number Diff line change
@@ -40,6 +40,7 @@ import { EventEmitter2 } from '@nestjs/event-emitter';
import { ModelEvent, ModelId, ModelStatus } from '@/domain/models/model.event';
import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service';
import { ContextService } from '@/infrastructure/services/context/context.service';
import { Engines } from '@/infrastructure/commanders/types/engine.interface';

@Injectable()
export class ModelsUsecases {
@@ -466,7 +467,7 @@ export class ModelsUsecases {
// Default Model Settings
ctx_len: 4096,
ngl: 100,
engine: modelId.includes('onnx') ? 'cortex.onnx' : 'cortex.llamacpp',
engine: Engines.llamaCPP,
};
if (!(await this.findOne(modelId))) await this.create(model);
}