From 85c4f6953fdbc900e04728d2906f03b120a43281 Mon Sep 17 00:00:00 2001 From: Louis Date: Mon, 10 Jun 2024 12:37:26 +0700 Subject: [PATCH] chore: update model start params DTO --- .../src/domain/models/model.interface.ts | 16 +----- .../commanders/models/model-start.command.ts | 5 +- .../commanders/shortcuts/run.command.ts | 10 ++-- .../controllers/models.controller.ts | 8 ++- .../dtos/models/create-model.dto.ts | 18 +++++- .../dtos/models/model-settings.dto.ts | 56 +++++++++++++++++++ .../repositories/model/model.repository.ts | 9 ++- .../src/usecases/cortex/cortex.usecases.ts | 5 +- .../src/usecases/messages/messages.module.ts | 3 +- .../src/usecases/models/models.usecases.ts | 10 +++- 10 files changed, 104 insertions(+), 36 deletions(-) create mode 100644 cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts index 3a3905935..e42ce2cee 100644 --- a/cortex-js/src/domain/models/model.interface.ts +++ b/cortex-js/src/domain/models/model.interface.ts @@ -81,7 +81,7 @@ export interface Model { ngl?: number; /** - * The number of parallel operations. Only set when enable continuous batching. + * Number of parallel sequences to decode */ n_parallel?: number; @@ -96,13 +96,6 @@ export interface Model { engine?: string; } -export interface ModelMetadata { - author: string; - tags: string[]; - size: number; - cover?: string; -} - /** * The available model settings. */ @@ -140,10 +133,3 @@ export interface ModelRuntimeParams { presence_penalty?: number; engine?: string; } - -/** - * Represents the model initialization error. - */ -export type ModelInitFailed = Model & { - error: Error; -}; diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts index c4bd31278..d49977df6 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -7,7 +7,6 @@ import { import { exit } from 'node:process'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { isLocalModel } from '../utils/normalize-model-id'; type ModelStartOptions = { attach: boolean; @@ -52,9 +51,7 @@ export class ModelStartCommand extends CommandRunner { } modelInquiry = async () => { - const models = (await this.modelsCliUsecases.listAllModels()).filter( - (model) => isLocalModel(model.files), - ); + const models = await this.modelsCliUsecases.listAllModels(); if (!models.length) throw 'No models found'; const { model } = await this.inquirerService.inquirer.prompt({ type: 'list', diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts index 118212590..e214e50b5 100644 --- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -7,9 +7,11 @@ import { } from 'nest-commander'; import { exit } from 'node:process'; import { ChatCliUsecases } from '../usecases/chat.cli.usecases'; -import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex'; +import { + defaultCortexCppHost, + defaultCortexCppPort, +} from '@/infrastructure/constants/cortex'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; -import { isLocalModel } from '../utils/normalize-model-id'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; type RunOptions = { @@ -77,9 +79,7 @@ export class RunCommand extends CommandRunner { } modelInquiry = async () => { - const models = (await this.modelsCliUsecases.listAllModels()).filter( - (model) => isLocalModel(model.files), - ); + const models = await this.modelsCliUsecases.listAllModels(); if (!models.length) throw 'No models found'; const { model } = await this.inquirerService.inquirer.prompt({ type: 'list', diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts index b3f5b4ac5..bcd4ab272 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.ts @@ -20,6 +20,7 @@ import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger'; import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; import { TransformInterceptor } from '../interceptors/transform.interceptor'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { ModelSettingsDto } from '../dtos/models/model-settings.dto'; @ApiTags('Models') @Controller('models') @@ -61,10 +62,13 @@ export class ModelsController { description: 'The unique identifier of the model.', }) @Post(':modelId(*)/start') - startModel(@Param('modelId') modelId: string, @Body() model: ModelDto) { + startModel( + @Param('modelId') modelId: string, + @Body() params: ModelSettingsDto, + ) { return this.cortexUsecases .startCortex() - .then(() => this.modelsUsecases.startModel(modelId, model)); + .then(() => this.modelsUsecases.startModel(modelId, params)); } @HttpCode(200) diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts index 09beb940b..4ff037171 100644 --- a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts @@ -4,6 +4,7 @@ import { IsNumber, IsOptional, IsString, + Min, } from 'class-validator'; import { Model } from '@/domain/models/model.interface'; import { ModelArtifactDto } from './model-artifact.dto'; @@ -50,6 +51,7 @@ export class CreateModelDto implements Partial { @ApiProperty({ description: 'Sets the upper limit on the number of tokens the model can generate in a single output.', + example: 4096, }) @IsOptional() @IsNumber() @@ -97,30 +99,40 @@ export class CreateModelDto implements Partial { @ApiProperty({ description: 'Sets the maximum input the model can use to generate a response, it varies with the model used.', + example: 4096, }) @IsOptional() @IsNumber() ctx_len?: number; - @ApiProperty({ description: 'Determines GPU layer usage.' }) + @ApiProperty({ description: 'Determines GPU layer usage.', example: 32 }) @IsOptional() @IsNumber() ngl?: number; - @ApiProperty({ description: 'Number of parallel processing units to use.' }) + @ApiProperty({ + description: 'Number of parallel processing units to use.', + example: 1, + }) @IsOptional() @IsNumber() + @Min(1) n_parallel?: number; @ApiProperty({ description: 'Determines CPU inference threads, limited by hardware and OS. ', + example: 10, }) @IsOptional() @IsNumber() + @Min(1) cpu_threads?: number; - @ApiProperty({ description: 'The engine used to run the model.' }) + @ApiProperty({ + description: 'The engine used to run the model.', + example: 'cortex.llamacpp', + }) @IsOptional() @IsString() engine?: string; diff --git a/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts new file mode 100644 index 000000000..32dffe469 --- /dev/null +++ b/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts @@ -0,0 +1,56 @@ +import { ModelSettingParams } from '@/domain/models/model.interface'; +import { ApiProperty } from '@nestjs/swagger'; +import { IsArray, IsNumber, IsOptional, Min } from 'class-validator'; + +export class ModelSettingsDto implements ModelSettingParams { + // Prompt Settings + @ApiProperty({ + example: 'system\n{system_message}\nuser\n{prompt}\nassistant', + description: + "A predefined text or framework that guides the AI model's response generation.", + }) + @IsOptional() + prompt_template?: string; + + @ApiProperty({ + type: [String], + example: [], + description: + 'Defines specific tokens or phrases that signal the model to stop producing further output.', + }) + @IsArray() + @IsOptional() + stop?: string[]; + + // Engine Settings + @ApiProperty({ description: 'Determines GPU layer usage.', example: 4096 }) + @IsOptional() + @IsNumber() + ngl?: number; + + @ApiProperty({ + description: + 'The context length for model operations varies; the maximum depends on the specific model used.', + example: 4096, + }) + @IsOptional() + @IsNumber() + ctx_len?: number; + + @ApiProperty({ + description: + 'Determines CPU inference threads, limited by hardware and OS. ', + example: 10, + }) + @IsOptional() + @IsNumber() + @Min(1) + cpu_threads?: number; + + @ApiProperty({ + example: 'cortex.llamacpp', + description: 'The engine to use.', + }) + @IsOptional() + engine?: string; +} diff --git a/cortex-js/src/infrastructure/repositories/model/model.repository.ts b/cortex-js/src/infrastructure/repositories/model/model.repository.ts index 6401c1ec7..78bb3c13d 100644 --- a/cortex-js/src/infrastructure/repositories/model/model.repository.ts +++ b/cortex-js/src/infrastructure/repositories/model/model.repository.ts @@ -12,7 +12,10 @@ import { writeFileSync, } from 'fs'; import { load, dump } from 'js-yaml'; -import { normalizeModelId } from '@/infrastructure/commanders/utils/normalize-model-id'; +import { + isLocalModel, + normalizeModelId, +} from '@/infrastructure/commanders/utils/normalize-model-id'; @Injectable() export class ModelRepositoryImpl implements ModelRepository { @@ -58,7 +61,9 @@ export class ModelRepositoryImpl implements ModelRepository { * @returns the created model */ findAll(): Promise { - return this.loadModels(); + return this.loadModels().then((res) => + res.filter((model) => isLocalModel(model.files)), + ); } /** * Find one model by id diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index f867667f8..f7fbd5a6c 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -3,7 +3,10 @@ import { ChildProcess, spawn } from 'child_process'; import { join } from 'path'; import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; import { HttpService } from '@nestjs/axios'; -import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex'; +import { + defaultCortexCppHost, + defaultCortexCppPort, +} from '@/infrastructure/constants/cortex'; import { existsSync } from 'node:fs'; import { firstValueFrom } from 'rxjs'; import { FileManagerService } from '@/file-manager/file-manager.service'; diff --git a/cortex-js/src/usecases/messages/messages.module.ts b/cortex-js/src/usecases/messages/messages.module.ts index cab6b863f..ab759dc81 100644 --- a/cortex-js/src/usecases/messages/messages.module.ts +++ b/cortex-js/src/usecases/messages/messages.module.ts @@ -1,11 +1,10 @@ import { Module } from '@nestjs/common'; import { MessagesUsecases } from './messages.usecases'; -import { MessagesController } from '@/infrastructure/controllers/messages.controller'; import { DatabaseModule } from '@/infrastructure/database/database.module'; @Module({ imports: [DatabaseModule], - controllers: [MessagesController], + controllers: [], providers: [MessagesUsecases], exports: [MessagesUsecases], }) diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index b7793d9d5..79af49f9f 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -69,7 +69,11 @@ export class ModelsUsecases { return this.modelRepository .remove(id) - .then(() => rmdirSync(modelFolder, { recursive: true })) + .then( + () => + existsSync(modelFolder) && + rmdirSync(modelFolder, { recursive: true }), + ) .then(() => { return { message: 'Model removed successfully', @@ -100,7 +104,9 @@ export class ModelsUsecases { // Default settings ctx_len: 4096, ngl: 100, - ...(Array.isArray(model?.files) && + //TODO: Utils for model file retrieval + ...(model?.files && + Array.isArray(model.files) && !('llama_model_path' in model) && { llama_model_path: (model.files as string[])[0], }),