diff --git a/cortex-js/src/app.module.ts b/cortex-js/src/app.module.ts index b27888ea2..c4e1d2eda 100644 --- a/cortex-js/src/app.module.ts +++ b/cortex-js/src/app.module.ts @@ -7,6 +7,7 @@ import { DatabaseModule } from './infrastructure/database/database.module'; import { ChatModule } from './usecases/chat/chat.module'; import { AssistantsModule } from './usecases/assistants/assistants.module'; import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module'; +import { ModelRepositoryModule } from './infrastructure/repositories/model/model.module'; import { CortexModule } from './usecases/cortex/cortex.module'; import { ConfigModule } from '@nestjs/config'; import { env } from 'node:process'; @@ -31,6 +32,7 @@ import { FileManagerModule } from './file-manager/file-manager.module'; CortexModule, ExtensionModule, FileManagerModule, + ModelRepositoryModule, ], providers: [SeedService], }) diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts index 6f8a834c9..3a3905935 100644 --- a/cortex-js/src/domain/models/model.interface.ts +++ b/cortex-js/src/domain/models/model.interface.ts @@ -1,21 +1,6 @@ -/** - * Represents the information about a model. - * @stored - */ -export interface ModelInfo { - id: string; - settings: ModelSettingParams; - parameters: ModelRuntimeParams; - engine?: string; -} - export interface ModelArtifact { - url: string; -} - -export enum ModelFormat { - GGUF = 'gguf', - API = 'api', + mmproj?: string; + llama_model_path?: string; } /** @@ -24,64 +9,91 @@ export enum ModelFormat { */ export interface Model { /** - * The type of the object. - * Default: "model" + * Model identifier. */ - object: string; + model: string; /** - * The version of the model. + * GGUF metadata: general.name */ - version: string; + name?: string; /** - * The format of the model. + * GGUF metadata: version */ - format: ModelFormat; + version?: string; /** * The model download source. It can be an external url or a local filepath. */ - sources: ModelArtifact[]; + files: string[] | ModelArtifact; + + /** + * GGUF metadata: tokenizer.chat_template + */ + prompt_template?: string; + + /** + * Defines specific tokens or phrases at which the model will stop generating further output. + */ + stop?: string[]; + + /// Inferencing + /** + * Set probability threshold for more relevant outputs. + */ + top_p?: number; /** - * The model identifier, which can be referenced in the API endpoints. + * Controls the randomness of the model’s output. */ - id: string; + temperature?: number; /** - * Human-readable name that is used for UI. + * Adjusts the likelihood of the model repeating words or phrases in its output. */ - name: string; + frequency_penalty?: number; /** - * The Unix timestamp (in seconds) for when the model was created + * Influences the generation of new and varied concepts in the model’s output. */ - created: number; + presence_penalty?: number; + /// Engines /** - * Default: "A cool model from Huggingface" + * The context length for model operations varies; the maximum depends on the specific model used. */ - description: string; + ctx_len?: number; /** - * The model settings. + * Enable real-time data processing for faster predictions. */ - settings: ModelSettingParams; + stream?: boolean; + + /* + * The maximum number of tokens the model will generate in a single response. + */ + max_tokens?: number; /** - * The model runtime parameters. + * The number of layers to load onto the GPU for acceleration. */ - parameters: ModelRuntimeParams; + ngl?: number; /** - * Metadata of the model. + * The number of parallel operations. Only set when enable continuous batching. */ - metadata: ModelMetadata; + n_parallel?: number; + + /** + * Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system) + */ + cpu_threads?: number; + /** * The model engine. */ - engine: string; + engine?: string; } export interface ModelMetadata { @@ -109,6 +121,8 @@ export interface ModelSettingParams { cont_batching?: boolean; vision_model?: boolean; text_model?: boolean; + engine?: string; + stop?: string[]; } /** @@ -133,8 +147,3 @@ export interface ModelRuntimeParams { export type ModelInitFailed = Model & { error: Error; }; - -export interface NitroModelSettings extends ModelSettingParams { - llama_model_path: string; - cpu_threads: number; -} diff --git a/cortex-js/src/domain/models/thread.interface.ts b/cortex-js/src/domain/models/thread.interface.ts index 21a481aea..bdfb67b32 100644 --- a/cortex-js/src/domain/models/thread.interface.ts +++ b/cortex-js/src/domain/models/thread.interface.ts @@ -1,5 +1,5 @@ import { AssistantTool } from './assistant.interface'; -import { ModelInfo } from './model.interface'; +import { Model } from './model.interface'; export interface Thread { /** Unique identifier for the thread, generated by default using the ULID method. **/ @@ -40,7 +40,7 @@ export interface ThreadMetadata { export interface ThreadAssistantInfo { assistant_id: string; assistant_name: string; - model: ModelInfo; + model: Partial; instructions?: string; tools?: AssistantTool[]; } diff --git a/cortex-js/src/domain/repositories/model.interface.ts b/cortex-js/src/domain/repositories/model.interface.ts new file mode 100644 index 000000000..1d8f3ddac --- /dev/null +++ b/cortex-js/src/domain/repositories/model.interface.ts @@ -0,0 +1,4 @@ +import { Model } from '../models/model.interface'; +import { Repository } from './repository.interface'; + +export abstract class ModelRepository extends Repository {} diff --git a/cortex-js/src/file-manager/file-manager.service.ts b/cortex-js/src/file-manager/file-manager.service.ts index c47b876d0..62431b98b 100644 --- a/cortex-js/src/file-manager/file-manager.service.ts +++ b/cortex-js/src/file-manager/file-manager.service.ts @@ -10,8 +10,13 @@ export class FileManagerService { private configFile = '.cortexrc'; private cortexDirectoryName = 'cortex'; private modelFolderName = 'models'; + private extensionFoldername = 'extensions'; private cortexCppFolderName = 'cortex-cpp'; + /** + * Get cortex configs + * @returns the config object + */ async getConfig(): Promise { const homeDir = os.homedir(); const configPath = join(homeDir, this.configFile); @@ -71,8 +76,33 @@ export class FileManagerService { }; } + /** + * Get the app data folder path + * Usually it is located at the home directory > cortex + * @returns the path to the data folder + */ async getDataFolderPath(): Promise { const config = await this.getConfig(); return config.dataFolderPath; } + + /** + * Get the models data folder path + * Usually it is located at the home directory > cortex > models + * @returns the path to the models folder + */ + async getModelsPath(): Promise { + const dataFolderPath = await this.getDataFolderPath(); + return join(dataFolderPath, this.modelFolderName); + } + + /** + * Get the extensions data folder path + * Usually it is located at the home directory > cortex > extensions + * @returns the path to the extensions folder + */ + async getExtensionsPath(): Promise { + const dataFolderPath = await this.getDataFolderPath(); + return join(dataFolderPath, this.extensionFoldername); + } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts index 15136adc6..0ca0f7142 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts @@ -14,7 +14,8 @@ export class ModelGetCommand extends CommandRunner { exit(1); } - const models = await this.modelsCliUsecases.getModel(input[0]); - console.log(models); + const model = await this.modelsCliUsecases.getModel(input[0]); + if (!model) console.error('Model not found'); + else console.log(model); } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts index a32c609cd..90a914963 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts @@ -15,10 +15,9 @@ export class ModelListCommand extends CommandRunner { option.format === 'table' ? console.table( models.map((e) => ({ - id: e.id, + id: e.model, engine: e.engine, - format: e.format, - created: e.created, + version: e.version, })), ) : console.log(models); diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index f8b6891b9..7793cf13c 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -2,7 +2,7 @@ import { CommandRunner, InquirerService, SubCommand } from 'nest-commander'; import { exit } from 'node:process'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; import { RepoDesignation, listFiles } from '@huggingface/hub'; -import { basename } from 'node:path'; +import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; @SubCommand({ name: 'pull', @@ -29,12 +29,16 @@ export class ModelPullCommand extends CommandRunner { ? undefined : await this.tryToGetBranches(input[0]); - if (!branches) { - await this.modelsCliUsecases.pullModel(input[0]); - } else { - // if there's metadata.yaml file, we assumed it's a JanHQ model - await this.handleJanHqModel(input[0], branches); - } + await this.modelsCliUsecases + .pullModel( + !branches ? input[0] : await this.handleJanHqModel(input[0], branches), + ) + .catch((e: Error) => { + if (e instanceof ModelNotFoundException) + console.error('Model does not exist.'); + else console.error(e); + exit(1); + }); console.log('\nDownload complete!'); exit(0); @@ -83,10 +87,6 @@ export class ModelPullCommand extends CommandRunner { } private async handleJanHqModel(repoName: string, branches: string[]) { - const sanitizedRepoName = repoName.trim().startsWith(this.janHqModelPrefix) - ? repoName - : `${this.janHqModelPrefix}/${repoName}`; - let selectedTag = branches[0]; if (branches.length > 1) { @@ -98,30 +98,7 @@ export class ModelPullCommand extends CommandRunner { console.error("Can't find model revision."); exit(1); } - - const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName }; - let ggufUrl: string | undefined = undefined; - let fileSize = 0; - for await (const fileInfo of listFiles({ - repo: repo, - revision: revision, - })) { - if (fileInfo.path.endsWith('.gguf')) { - ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`; - fileSize = fileInfo.size; - break; - } - } - - if (!ggufUrl) { - console.error("Can't find model file."); - exit(1); - } - console.log('Downloading', basename(ggufUrl)); - await this.modelsCliUsecases.pullModelWithExactUrl( - `${sanitizedRepoName}/${revision}`, - ggufUrl, - fileSize, - ); + // Return parsed model Id + return `${repoName}:${revision}`; } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts index 3fcbd82e8..be91c3fe0 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -33,6 +33,16 @@ export class ModelStartCommand extends CommandRunner { } } + const existingModel = await this.modelsCliUsecases.getModel(modelId); + if ( + !existingModel || + !Array.isArray(existingModel.files) || + /^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0]) + ) { + console.error('Model is not available. Please pull the model first.'); + process.exit(1); + } + await this.cortexUsecases .startCortex(options.attach) .then(() => this.modelsCliUsecases.startModel(modelId, options.preset)) @@ -41,7 +51,11 @@ export class ModelStartCommand extends CommandRunner { } modelInquiry = async () => { - const models = await this.modelsCliUsecases.listAllModels(); + const models = (await this.modelsCliUsecases.listAllModels()).filter( + (model) => + Array.isArray(model.files) && + !/^(http|https):\/\/[^/]+\/.*/.test(model.files[0]), + ); if (!models.length) throw 'No models found'; const { model } = await this.inquirerService.inquirer.prompt({ type: 'list', @@ -49,7 +63,7 @@ export class ModelStartCommand extends CommandRunner { message: 'Select a model to start:', choices: models.map((e) => ({ name: e.name, - value: e.id, + value: e.model, })), }); return model; diff --git a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts index 6f583e64c..7f9c6b0cd 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts @@ -1,11 +1,7 @@ import { CommandRunner, SubCommand, Option } from 'nest-commander'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; import { exit } from 'node:process'; -import { ModelParameterParser } from '../utils/model-parameter.parser'; -import { - ModelRuntimeParams, - ModelSettingParams, -} from '@/domain/models/model.interface'; +import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; type UpdateOptions = { model?: string; @@ -31,42 +27,13 @@ export class ModelUpdateCommand extends CommandRunner { exit(0); } - const parser = new ModelParameterParser(); - const settingParams: ModelSettingParams = {}; - const runtimeParams: ModelRuntimeParams = {}; + const toUpdate: UpdateModelDto = {}; options.forEach((option) => { const [key, stringValue] = option.split('='); - if (parser.isModelSettingParam(key)) { - const value = parser.parse(key, stringValue); - // @ts-expect-error did the check so it's safe - settingParams[key] = value; - } else if (parser.isModelRuntimeParam(key)) { - const value = parser.parse(key, stringValue); - // @ts-expect-error did the check so it's safe - runtimeParams[key] = value; - } + Object.assign(toUpdate, { key, stringValue }); }); - - if (Object.keys(settingParams).length > 0) { - const updatedSettingParams = - await this.modelsCliUsecases.updateModelSettingParams( - modelId, - settingParams, - ); - console.log( - 'Updated setting params! New setting params:', - updatedSettingParams, - ); - } - - if (Object.keys(runtimeParams).length > 0) { - await this.modelsCliUsecases.updateModelRuntimeParams( - modelId, - runtimeParams, - ); - console.log('Updated runtime params! New runtime params:', runtimeParams); - } + this.modelsCliUsecases.updateModel(modelId, toUpdate); } @Option({ diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts index 464818b66..8cbe6f983 100644 --- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -63,7 +63,11 @@ export class RunCommand extends CommandRunner { } modelInquiry = async () => { - const models = await this.modelsCliUsecases.listAllModels(); + const models = (await this.modelsCliUsecases.listAllModels()).filter( + (model) => + Array.isArray(model.files) && + !/^(http|https):\/\/[^/]+\/.*/.test(model.files[0]), + ); if (!models.length) throw 'No models found'; const { model } = await this.inquirerService.inquirer.prompt({ type: 'list', @@ -71,7 +75,7 @@ export class RunCommand extends CommandRunner { message: 'Select a model to start:', choices: models.map((e) => ({ name: e.name, - value: e.id, + value: e.model, })), }); return model; diff --git a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts index 2ef890d49..9347137de 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts @@ -15,11 +15,11 @@ import { Thread } from '@/domain/models/thread.interface'; import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto'; import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases'; import { CreateThreadAssistantDto } from '@/infrastructure/dtos/threads/create-thread-assistant.dto'; -import { CreateThreadModelInfoDto } from '@/infrastructure/dtos/threads/create-thread-model-info.dto'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; import stream from 'stream'; import { CreateMessageDto } from '@/infrastructure/dtos/messages/create-message.dto'; import { MessagesUsecases } from '@/usecases/messages/messages.usecases'; +import { ModelParameterParser } from '../utils/model-parameter.parser'; @Injectable() export class ChatCliUsecases { @@ -68,12 +68,14 @@ export class ChatCliUsecases { rl.on('line', sendCompletionMessage.bind(this)); - function sendCompletionMessage(userInput: string) { + async function sendCompletionMessage(userInput: string) { if (userInput.trim() === this.exitClause) { rl.close(); return; } + const model = await this.modelsUsecases.findOne(modelId); + messages.push({ content: userInput, role: ChatCompletionRole.User, @@ -95,23 +97,64 @@ export class ChatCliUsecases { }; this.messagesUsecases.create(createMessageDto); + const parser = new ModelParameterParser(); const chatDto: CreateChatCompletionDto = { + // Default results params messages, model: modelId, stream: true, - max_tokens: 2048, + max_tokens: 4098, stop: [], frequency_penalty: 0.7, presence_penalty: 0.7, temperature: 0.7, top_p: 0.7, + + // Override with model settings + ...parser.parseModelInferenceParams(model), }; const decoder = new TextDecoder('utf-8'); this.chatUsecases .inference(chatDto, {}) + .then((response: stream.Readable) => { + // None streaming - json object response + if (!chatDto.stream) { + const objectData = response as any; + const assistantResponse = + objectData.choices[0]?.message?.content ?? ''; + + stdout.write(assistantResponse); + messages.push({ + content: assistantResponse, + role: ChatCompletionRole.Assistant, + }); + + const createMessageDto: CreateMessageDto = { + thread_id: thread.id, + role: ChatCompletionRole.Assistant, + content: [ + { + type: ContentType.Text, + text: { + value: assistantResponse, + annotations: [], + }, + }, + ], + status: MessageStatus.Ready, + }; + + this.messagesUsecases.create(createMessageDto).then(() => { + console.log('\n'); + if (attach) rl.prompt(); + else rl.close(); + }); + return; + } + // Streaming let assistantResponse: string = ''; response.on('error', (error: any) => { @@ -194,16 +237,10 @@ export class ChatCliUsecases { const assistant = await this.assistantUsecases.findOne('jan'); if (!assistant) throw new Error('No assistant available'); - const createThreadModel: CreateThreadModelInfoDto = { - id: modelId, - settings: model.settings, - parameters: model.parameters, - }; - const assistantDto: CreateThreadAssistantDto = { assistant_id: assistant.id, assistant_name: assistant.name, - model: createThreadModel, + model: model, }; const createThreadDto: CreateThreadDto = { diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index abbf95c21..9e03b9723 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -1,11 +1,6 @@ import { exit } from 'node:process'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { - Model, - ModelFormat, - ModelRuntimeParams, - ModelSettingParams, -} from '@/domain/models/model.interface'; +import { Model } from '@/domain/models/model.interface'; import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface'; import { gguf } from '@huggingface/gguf'; @@ -25,10 +20,12 @@ import { ModelTokenizer } from '../types/model-tokenizer.interface'; import { HttpService } from '@nestjs/axios'; import { firstValueFrom } from 'rxjs'; import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; +import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; import { FileManagerService } from '@/file-manager/file-manager.service'; -import { join } from 'path'; +import { join, basename } from 'path'; import { load } from 'js-yaml'; -import { existsSync, readFileSync } from 'node:fs'; +import { existsSync, readFileSync } from 'fs'; +import { normalizeModelId } from '../utils/normalize-model-id'; const AllQuantizations = [ 'Q3_K_S', @@ -60,7 +57,7 @@ export class ModelsCliUsecases { @Inject(InquirerService) private readonly inquirerService: InquirerService, private readonly httpService: HttpService, - private readonly fileManagerService: FileManagerService, + private readonly fileService: FileManagerService, ) {} /** @@ -74,7 +71,7 @@ export class ModelsCliUsecases { const parsedPreset = await this.parsePreset(preset); return this.getModelOrStop(modelId) .then((model) => ({ - ...model.settings, + ...model, ...parsedPreset, })) .then((settings) => this.modelsUsecases.startModel(modelId, settings)) @@ -97,29 +94,10 @@ export class ModelsCliUsecases { } /** - * Update model's settings. E.g. ngl, prompt_template, etc. - * @param modelId - * @param settingParams - * @returns + * Update a model by ID with new data */ - async updateModelSettingParams( - modelId: string, - settingParams: ModelSettingParams, - ): Promise { - return this.modelsUsecases.updateModelSettingParams(modelId, settingParams); - } - - /** - * Update model's runtime parameters. E.g. max_tokens, temperature, etc. - * @param modelId - * @param runtimeParams - * @returns - */ - async updateModelRuntimeParams( - modelId: string, - runtimeParams: ModelRuntimeParams, - ): Promise { - return this.modelsUsecases.updateModelRuntimeParams(modelId, runtimeParams); + async updateModel(modelId: string, toUpdate: UpdateModelDto) { + return this.modelsUsecases.update(modelId, toUpdate); } /** @@ -127,7 +105,7 @@ export class ModelsCliUsecases { * @param modelId * @returns */ - private async getModelOrStop(modelId: string): Promise { + async getModelOrStop(modelId: string): Promise { const model = await this.modelsUsecases.findOne(modelId); if (!model) { console.debug('Model not found'); @@ -149,9 +127,8 @@ export class ModelsCliUsecases { * @param modelId * @returns */ - async getModel(modelId: string): Promise { - const model = await this.getModelOrStop(modelId); - return model; + async getModel(modelId: string): Promise { + return this.modelsUsecases.findOne(modelId); } /** @@ -164,52 +141,21 @@ export class ModelsCliUsecases { return this.modelsUsecases.remove(modelId); } - async pullModelWithExactUrl(modelId: string, url: string, fileSize: number) { - const tokenizer = await this.getHFModelTokenizer(url); - const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2; - const stopWords: string[] = [tokenizer?.stopWord ?? '']; - - const model: CreateModelDto = { - sources: [ - { - url: url, - }, - ], - id: modelId, - name: modelId, - version: '1.0.0', - format: ModelFormat.GGUF, - description: '', - settings: { - prompt_template: promptTemplate, - }, - parameters: { - stop: stopWords, - }, - metadata: { - author: 'janhq', - size: fileSize, - tags: [], - }, - engine: 'cortex', - }; - if (!(await this.modelsUsecases.findOne(modelId))) { - await this.modelsUsecases.create(model); - } - - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - const callback = (progress: number) => { - bar.update(progress); - }; - await this.modelsUsecases.downloadModel(modelId, callback); - } - /** * Pull model from Model repository (HF, Jan...) * @param modelId */ async pullModel(modelId: string) { + const existingModel = await this.modelsUsecases.findOne(modelId); + if ( + existingModel && + Array.isArray(existingModel.files) && + !/^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0]) + ) { + console.error('Model already exists'); + process.exit(1); + } + if (modelId.includes('/') || modelId.includes(':')) { await this.pullHuggingFaceModel(modelId); } @@ -218,7 +164,21 @@ export class ModelsCliUsecases { const callback = (progress: number) => { bar.update(progress); }; - await this.modelsUsecases.downloadModel(modelId, callback); + + try { + await this.modelsUsecases.downloadModel(modelId, callback); + + const model = await this.modelsUsecases.findOne(modelId); + const fileUrl = join( + await this.fileService.getModelsPath(), + normalizeModelId(modelId), + basename((model?.files as string[])[0]), + ); + await this.modelsUsecases.update(modelId, { files: [fileUrl] }); + } catch (err) { + bar.stop(); + throw err; + } } private async getHFModelTokenizer( @@ -289,29 +249,24 @@ export class ModelsCliUsecases { const stopWords: string[] = [tokenizer?.stopWord ?? '']; const model: CreateModelDto = { - sources: [ - { - url: sibling?.downloadUrl ?? '', - }, - ], - id: modelId, + files: [sibling.downloadUrl ?? ''], + model: modelId, name: modelId, - version: '', - format: ModelFormat.GGUF, - description: '', - settings: { - prompt_template: promptTemplate, - llama_model_path: sibling.rfilename, - }, - parameters: { - stop: stopWords, - }, - metadata: { - author: data.author, - size: sibling.fileSize ?? 0, - tags: [], - }, - engine: 'cortex', + prompt_template: promptTemplate, + stop: stopWords, + + // Default Inference Params + stream: true, + max_tokens: 4098, + frequency_penalty: 0.7, + presence_penalty: 0.7, + temperature: 0.7, + top_p: 0.7, + + // Default Model Settings + ctx_len: 4096, + ngl: 100, + engine: 'cortex.llamacpp', }; if (!(await this.modelsUsecases.findOne(modelId))) await this.modelsUsecases.create(model); @@ -456,7 +411,7 @@ export class ModelsCliUsecases { private async parsePreset(preset?: string): Promise { const presetPath = join( - await this.fileManagerService.getDataFolderPath(), + await this.fileService.getDataFolderPath(), 'presets', `${preset}.yaml`, ); diff --git a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts index 81f2fe8b4..51e097e3e 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts @@ -40,7 +40,7 @@ export class PSCliUsecases { currentTime.getTime() - new Date(startTime).getTime(); return { modelId: e.id, - engine: e.engine ?? 'llama.cpp', // TODO: get engine from model when it's ready + engine: e.engine ?? 'cortex.llamacpp', status: 'running', duration: this.formatDuration(duration), ram: e.ram ?? '-', diff --git a/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts b/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts index c8ca62650..7ea02e847 100644 --- a/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts +++ b/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts @@ -1,25 +1,24 @@ +import { + Model, + ModelRuntimeParams, + ModelSettingParams, +} from '@/domain/models/model.interface'; + // Make this class injectable export class ModelParameterParser { private modelSettingParamTypes: { [key: string]: string } = { + prompt_template: 'string', ctx_len: 'number', ngl: 'number', - embedding: 'boolean', n_parallel: 'number', cpu_threads: 'number', - prompt_template: 'string', - system_prompt: 'string', - ai_prompt: 'string', - user_prompt: 'string', llama_model_path: 'string', mmproj: 'string', cont_batching: 'boolean', - vision_model: 'boolean', - text_model: 'boolean', }; private modelRuntimeParamTypes: { [key: string]: string } = { temperature: 'number', - token_limit: 'number', top_k: 'number', top_p: 'number', stream: 'boolean', @@ -29,105 +28,44 @@ export class ModelParameterParser { presence_penalty: 'number', }; - isModelSettingParam(key: string): boolean { - return key in this.modelSettingParamTypes; - } - - isModelRuntimeParam(key: string): boolean { - return key in this.modelRuntimeParamTypes; - } - - parse(key: string, value: string): boolean | number | string | string[] { - if (this.isModelSettingParam(key)) { - return this.parseModelSettingParams(key, value); - } - - if (this.isModelRuntimeParam(key)) { - return this.parseModelRuntimeParams(key, value); - } - - throw new Error(`Invalid setting key: ${key}`); - } - - private parseModelSettingParams( - key: string, - value: string, - ): boolean | number | string | string[] { - const settingType = this.modelSettingParamTypes[key]; - if (!settingType) { - throw new Error(`Invalid setting key: ${key}`); - } - - switch (settingType) { - case 'string': - return value; - - case 'number': - return this.toNumber(value); - - case 'string[]': - return this.toStringArray(value); - - case 'boolean': - return this.toBoolean(value); - - default: - throw new Error('Invalid setting type'); - } + /** + * Parse the model inference parameters from origin Model + * @param model + * @returns Partial + */ + parseModelInferenceParams(model: Partial): Partial { + const inferenceParams: Partial & ModelRuntimeParams = + structuredClone(model); + return Object.keys(inferenceParams).reduce((acc, key) => { + if (!this.isModelRuntimeParam(key)) { + delete acc[key as keyof typeof acc]; + } + + return acc; + }, inferenceParams); } - - private parseModelRuntimeParams( - key: string, - value: string, - ): boolean | number | string | string[] { - const settingType = this.modelRuntimeParamTypes[key]; - if (!settingType) { - throw new Error(`Invalid setting key: ${key}`); - } - - switch (settingType) { - case 'string': - return value; - - case 'number': - return this.toNumber(value); - - case 'string[]': - return this.toStringArray(value); - - case 'boolean': - return this.toBoolean(value); - - default: - throw new Error('Invalid setting type'); - } - } - - private toNumber(str: string): number { - const num = parseFloat(str.trim()); - if (isNaN(num)) { - throw new Error(`Invalid number value: ${str}`); - } - return num; + /** + * Parse the model engine settings from origin Model + * @param model + * @returns Partial + */ + parseModelEngineSettings(model: Partial): Partial { + const engineSettings: Partial & ModelSettingParams = + structuredClone(model); + return Object.keys(engineSettings).reduce((acc, key) => { + if (!this.isModelSettingParam(key)) { + delete acc[key as keyof typeof acc]; + } + + return acc; + }, engineSettings); } - private toStringArray(str: string, delimiter: string = ','): string[] { - return str.split(delimiter).map((s) => s.trim()); + private isModelSettingParam(key: string): boolean { + return key in this.modelSettingParamTypes; } - private toBoolean(str: string): boolean { - const normalizedStr = str.trim().toLowerCase(); - switch (normalizedStr) { - case '1': - case 'true': - return true; - - case '0': - case 'false': - return false; - - default: - throw new Error(`Invalid boolean value: ${str}`); - } + private isModelRuntimeParam(key: string): boolean { + return key in this.modelRuntimeParamTypes; } } diff --git a/cortex-js/src/infrastructure/commanders/utils/normalize-model-id.ts b/cortex-js/src/infrastructure/commanders/utils/normalize-model-id.ts index c36cb339e..bd54f1bd9 100644 --- a/cortex-js/src/infrastructure/commanders/utils/normalize-model-id.ts +++ b/cortex-js/src/infrastructure/commanders/utils/normalize-model-id.ts @@ -1,3 +1,3 @@ export const normalizeModelId = (modelId: string): string => { - return modelId.replace(':', '%3A'); + return modelId.replace(':', '-').replace('/', '-'); }; diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts index bb1d415b1..5d6340e1a 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.ts @@ -18,7 +18,6 @@ import { DeleteModelResponseDto } from '@/infrastructure/dtos/models/delete-mode import { DownloadModelResponseDto } from '@/infrastructure/dtos/models/download-model.dto'; import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger'; import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; -import { ModelSettingParamsDto } from '../dtos/models/model-setting-params.dto'; import { TransformInterceptor } from '../interceptors/transform.interceptor'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; @@ -62,13 +61,10 @@ export class ModelsController { description: 'The unique identifier of the model.', }) @Post(':modelId(*)/start') - startModel( - @Param('modelId') modelId: string, - @Body() settings: ModelSettingParamsDto, - ) { + startModel(@Param('modelId') modelId: string, @Body() model: ModelDto) { return this.cortexUsecases .startCortex() - .then(() => this.modelsUsecases.startModel(modelId, settings)); + .then(() => this.modelsUsecases.startModel(modelId, model)); } @HttpCode(200) diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts index acc3fd9ab..09beb940b 100644 --- a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts @@ -1,55 +1,127 @@ -import { Type } from 'class-transformer'; -import { IsArray, IsEnum, IsString, ValidateNested } from 'class-validator'; -import { Model, ModelFormat } from '@/domain/models/model.interface'; +import { + IsArray, + IsBoolean, + IsNumber, + IsOptional, + IsString, +} from 'class-validator'; +import { Model } from '@/domain/models/model.interface'; import { ModelArtifactDto } from './model-artifact.dto'; -import { ModelSettingParamsDto } from './model-setting-params.dto'; -import { ModelRuntimeParamsDto } from './model-runtime-params.dto'; -import { ModelMetadataDto } from './model-metadata.dto'; -import { ApiProperty } from '@nestjs/swagger'; +import { ApiProperty, getSchemaPath } from '@nestjs/swagger'; export class CreateModelDto implements Partial { - @ApiProperty({ description: 'The version of the model.' }) + // Cortex Meta + @ApiProperty({ description: 'The unique identifier of the model.' }) @IsString() - version: string; + model: string; - @ApiProperty({ description: 'The state format of the model.' }) - @IsEnum(ModelFormat) - format: ModelFormat; + @ApiProperty({ description: 'The name of the model.' }) + @IsString() + name?: string; @ApiProperty({ description: 'The URL sources from which the model downloaded or accessed.', + oneOf: [ + { type: 'array', items: { type: 'string' } }, + { $ref: getSchemaPath(ModelArtifactDto) }, + ], }) @IsArray() - @ValidateNested({ each: true }) - @Type(() => ModelArtifactDto) - sources: ModelArtifactDto[]; + files: string[] | ModelArtifactDto; - @ApiProperty({ description: 'The unique identifier of the model.' }) + // Model Input / Output Syntax + @ApiProperty({ + description: + "A predefined text or framework that guides the AI model's response generation.", + }) + @IsOptional() @IsString() - id: string; + prompt_template?: string; - @ApiProperty({ description: 'The name of the model.' }) - @IsString() - name: string; + @ApiProperty({ + description: + 'Defines specific tokens or phrases that signal the model to stop producing further output.', + }) + @IsOptional() + @IsArray() + stop?: string[]; - @ApiProperty({ description: 'A brief description of the model.' }) - @IsString() - description: string; + // Results Preferences + @ApiProperty({ + description: + 'Sets the upper limit on the number of tokens the model can generate in a single output.', + }) + @IsOptional() + @IsNumber() + max_tokens?: number; + + @ApiProperty({ + description: 'Sets probability threshold for more relevant outputs.', + }) + @IsOptional() + @IsNumber() + top_p?: number; - @ApiProperty({ description: 'The settings parameters of the model.' }) - @ValidateNested({ always: true, each: true }) - @Type(() => ModelSettingParamsDto) - settings: ModelSettingParamsDto; + @ApiProperty({ + description: "Influences the randomness of the model's output.", + }) + @IsOptional() + @IsNumber() + temperature?: number; - @ApiProperty({ description: 'The parameters configuration of the model.' }) - @ValidateNested() - parameters: ModelRuntimeParamsDto; + @ApiProperty({ + description: + 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', + }) + @IsOptional() + @IsNumber() + frequency_penalty?: number; - @ApiProperty({ description: 'The metadata of the model.' }) - @ValidateNested() - metadata: ModelMetadataDto; + @ApiProperty({ + description: + 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', + }) + @IsOptional() + @IsNumber() + presence_penalty?: number; + + @ApiProperty({ + description: + 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', + }) + @IsOptional() + @IsBoolean() + stream?: boolean; + + // Engine Settings + @ApiProperty({ + description: + 'Sets the maximum input the model can use to generate a response, it varies with the model used.', + }) + @IsOptional() + @IsNumber() + ctx_len?: number; + + @ApiProperty({ description: 'Determines GPU layer usage.' }) + @IsOptional() + @IsNumber() + ngl?: number; + + @ApiProperty({ description: 'Number of parallel processing units to use.' }) + @IsOptional() + @IsNumber() + n_parallel?: number; + + @ApiProperty({ + description: + 'Determines CPU inference threads, limited by hardware and OS. ', + }) + @IsOptional() + @IsNumber() + cpu_threads?: number; @ApiProperty({ description: 'The engine used to run the model.' }) + @IsOptional() @IsString() - engine: string; + engine?: string; } diff --git a/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts index d1b2cf402..36b72b92e 100644 --- a/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts @@ -3,7 +3,10 @@ import { ModelArtifact } from '@/domain/models/model.interface'; import { ApiProperty } from '@nestjs/swagger'; export class ModelArtifactDto implements ModelArtifact { - @ApiProperty({ description: 'The URL source of the model.' }) + @ApiProperty({ description: 'The mmproj bin file url.' }) @IsString() - url: string; + mmproj?: string; + @ApiProperty({ description: 'The llama model bin file url.' }) + @IsString() + llama_model_path?: string; } diff --git a/cortex-js/src/infrastructure/dtos/models/model-metadata.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-metadata.dto.ts deleted file mode 100644 index 2e8ea6020..000000000 --- a/cortex-js/src/infrastructure/dtos/models/model-metadata.dto.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { IsArray, IsNumber, IsOptional, IsString } from 'class-validator'; -import { ModelMetadata } from '@/domain/models/model.interface'; -import { ApiProperty } from '@nestjs/swagger'; - -export class ModelMetadataDto implements ModelMetadata { - @ApiProperty({ description: 'The author of the model.' }) - @IsString() - author: string; - - @ApiProperty({ description: "The model's tags." }) - @IsArray() - tags: string[]; - - @ApiProperty({ description: "The model's size." }) - @IsNumber() - size: number; - - @ApiProperty({ description: "The model's cover." }) - @IsString() - @IsOptional() - cover?: string | undefined; -} diff --git a/cortex-js/src/infrastructure/dtos/models/model-runtime-params.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-runtime-params.dto.ts deleted file mode 100644 index 3ee6db44c..000000000 --- a/cortex-js/src/infrastructure/dtos/models/model-runtime-params.dto.ts +++ /dev/null @@ -1,86 +0,0 @@ -import { - IsArray, - IsBoolean, - IsNumber, - IsOptional, - IsString, -} from 'class-validator'; -import { ModelRuntimeParams } from '@/domain/models/model.interface'; -import { ApiProperty } from '@nestjs/swagger'; - -export class ModelRuntimeParamsDto implements ModelRuntimeParams { - @ApiProperty({ - description: "Influences the randomness of the model's output.", - }) - @IsOptional() - @IsNumber() - temperature?: number; - - @ApiProperty({ - description: - 'Sets the maximum number of pieces (like words or characters) the model will produce at one time.', - }) - @IsOptional() - @IsNumber() - token_limit?: number; - - @ApiProperty({ - description: - "Limits the model's choices when it's deciding what to write next.", - }) - @IsOptional() - @IsNumber() - top_k?: number; - - @ApiProperty({ - description: 'Sets probability threshold for more relevant outputs.', - }) - @IsOptional() - @IsNumber() - top_p?: number; - - @ApiProperty({ - description: - 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', - }) - @IsOptional() - @IsBoolean() - stream?: boolean; - - @ApiProperty({ - description: - 'Sets the upper limit on the number of tokens the model can generate in a single output.', - }) - @IsOptional() - @IsNumber() - max_tokens?: number; - - @ApiProperty({ - description: - 'Defines specific tokens or phrases that signal the model to stop producing further output.', - }) - @IsOptional() - @IsArray() - stop?: string[]; - - @ApiProperty({ - description: - 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', - }) - @IsOptional() - @IsNumber() - frequency_penalty?: number; - - @ApiProperty({ - description: - 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', - }) - @IsOptional() - @IsNumber() - presence_penalty?: number; - - @ApiProperty({ description: 'The engine used to run the model.' }) - @IsOptional() - @IsString() - engine?: string; -} diff --git a/cortex-js/src/infrastructure/dtos/models/model-setting-params.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-setting-params.dto.ts deleted file mode 100644 index 146934d0f..000000000 --- a/cortex-js/src/infrastructure/dtos/models/model-setting-params.dto.ts +++ /dev/null @@ -1,108 +0,0 @@ -import { IsBoolean, IsNumber, IsOptional, IsString } from 'class-validator'; -import { ModelSettingParams } from '@/domain/models/model.interface'; -import { ApiProperty } from '@nestjs/swagger'; - -export class ModelSettingParamsDto implements ModelSettingParams { - @ApiProperty({ - description: - 'Sets the maximum input the model can use to generate a response, it varies with the model used.', - }) - @IsOptional() - @IsNumber() - ctx_len?: number; - - @ApiProperty({ description: 'Determines GPU layer usage.' }) - @IsOptional() - @IsNumber() - ngl?: number; - - @ApiProperty({ - description: - 'Enables embedding utilization for tasks like document-enhanced chat in RAG-based applications.', - }) - @IsOptional() - @IsBoolean() - embedding?: boolean; - - @ApiProperty({ description: 'Number of parallel processing units to use.' }) - @IsOptional() - @IsNumber() - n_parallel?: number; - - @ApiProperty({ - description: - 'Determines CPU inference threads, limited by hardware and OS. ', - }) - @IsOptional() - @IsNumber() - cpu_threads?: number; - - @ApiProperty({ - description: - "A predefined text or framework that guides the AI model's response generation.", - }) - @IsOptional() - @IsString() - prompt_template?: string; - - @ApiProperty({ - description: - 'Specific prompt used by the system for generating model outputs.', - }) - @IsOptional() - @IsString() - system_prompt?: string; - - @ApiProperty({ - description: - 'The prompt fed into the AI, typically to guide or specify the nature of the content it should generate.', - }) - @IsOptional() - @IsString() - ai_prompt?: string; - - @ApiProperty({ - description: - 'Customizable prompt input by the user to direct the model’s output generation.', - }) - @IsOptional() - @IsString() - user_prompt?: string; - - @ApiProperty({ description: 'File path to a specific llama model.' }) - @IsOptional() - @IsString() - llama_model_path?: string; - - @ApiProperty({ - description: - 'The mmproj is a projection matrix that is used to project the embeddings from CLIP into tokens usable by llama/mistral.', - }) - @IsOptional() - @IsString() - mmproj?: string; - - @ApiProperty({ - description: - 'Controls continuous batching, enhancing throughput for LLM inference.', - }) - @IsOptional() - @IsBoolean() - cont_batching?: boolean; - - @ApiProperty({ - description: - 'Specifies if a vision-based model (for image processing) should be used.', - }) - @IsOptional() - @IsBoolean() - vision_model?: boolean; - - @ApiProperty({ - description: - 'Specifies if a text-based model is to be utilized, for tasks like text generation or analysis.', - }) - @IsOptional() - @IsBoolean() - text_model?: boolean; -} diff --git a/cortex-js/src/infrastructure/dtos/models/model-successfully-created.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-successfully-created.dto.ts index 64a9e7683..97ab21617 100644 --- a/cortex-js/src/infrastructure/dtos/models/model-successfully-created.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/model-successfully-created.dto.ts @@ -1,92 +1,108 @@ +import { Model } from '@/domain/models/model.interface'; import { ApiProperty } from '@nestjs/swagger'; +import { IsArray, IsBoolean, IsNumber, IsOptional } from 'class-validator'; -export class ModelDto { +export class ModelDto implements Partial { + // Prompt Settings @ApiProperty({ - example: - 'https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf', - description: 'URL to the source of the model.', + example: 'system\n{system_message}\nuser\n{prompt}\nassistant', + description: + "A predefined text or framework that guides the AI model's response generation.", }) - source_url: string; + @IsOptional() + prompt_template?: string; @ApiProperty({ - example: 'trinity-v1.2-7b', + type: [String], + example: [], description: - 'Unique identifier used in chat-completions model_name, matches folder name.', + 'Defines specific tokens or phrases that signal the model to stop producing further output.', }) - id: string; + @IsArray() + @IsOptional() + stop?: string[]; - @ApiProperty({ example: 'model' }) - object: string; + // Results Preferences @ApiProperty({ - example: 'Trinity-v1.2 7B Q4', - description: 'Name of the model.', + example: 4096, + description: + 'Sets the upper limit on the number of tokens the model can generate in a single output.', }) - name: string; + @IsOptional() + @IsNumber() + max_tokens?: number; @ApiProperty({ - default: '1.0', - description: 'The version number of the model.', + example: 0.7, + description: "Influences the randomness of the model's output.", }) - version: string; + @IsOptional() + @IsNumber() + temperature?: number; @ApiProperty({ - example: - 'Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.', - description: 'Description of the model.', + example: 0.95, + description: 'Sets probability threshold for more relevant outputs', }) - description: string; + @IsOptional() + @IsNumber() + top_p?: number; @ApiProperty({ - example: 'gguf', - description: 'State format of the model, distinct from the engine.', + example: true, + description: + 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', }) - format: string; - - @ApiProperty({ description: 'Context length.', example: 4096 }) - ctx_len: number; + @IsOptional() + @IsBoolean() + stream?: boolean; @ApiProperty({ - example: 'system\n{system_message}\nuser\n{prompt}\nassistant', + example: 0, + description: + 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', }) - prompt_template: string; - - @ApiProperty({ example: 0.7 }) - temperature: number; - - @ApiProperty({ example: 0.95 }) - top_p: number; - - @ApiProperty({ example: true }) - stream: boolean; - - @ApiProperty({ example: 4096 }) - max_tokens: number; - - @ApiProperty({ type: [String], example: [] }) - stop: string[]; + @IsOptional() + @IsNumber() + frequency_penalty?: number; - @ApiProperty({ example: 0 }) - frequency_penalty: number; - - @ApiProperty({ example: 0 }) - presence_penalty: number; - - @ApiProperty({ example: 'Jan' }) - author: string; + @ApiProperty({ + example: 0, + description: + 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', + }) + @IsOptional() + @IsNumber() + presence_penalty?: number; - @ApiProperty({ type: [String], example: ['7B', 'Merged', 'Featured'] }) - tags: string[]; + // Engine Settings + @ApiProperty({ description: 'Determines GPU layer usage.', example: 4096 }) + @IsOptional() + @IsNumber() + ngl?: number; - @ApiProperty({ example: 4370000000 }) - size: number; + @ApiProperty({ + description: + 'The context length for model operations varies; the maximum depends on the specific model used.', + example: 4096, + }) + @IsOptional() + @IsNumber() + ctx_len?: number; @ApiProperty({ - example: - 'https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png', + description: + 'Determines CPU inference threads, limited by hardware and OS. ', }) - cover: string; + @IsOptional() + @IsNumber() + cpu_threads?: number; - @ApiProperty({ example: 'cortex' }) - engine: string; + @ApiProperty({ + example: 'cortex.llamacpp', + description: 'The engine to use.', + }) + @IsOptional() + engine?: string; } diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts index 659d0296e..2a6e8fbaf 100644 --- a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts +++ b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts @@ -1,9 +1,9 @@ import { IsArray, IsOptional, IsString, ValidateNested } from 'class-validator'; import { ThreadAssistantInfo } from '@/domain/models/thread.interface'; -import { CreateThreadModelInfoDto } from './create-thread-model-info.dto'; import { AssistantToolDto } from '@/infrastructure/dtos/assistants/assistant-tool.dto'; import { Type } from 'class-transformer'; import { ApiProperty } from '@nestjs/swagger'; +import { CreateModelDto } from '../models/create-model.dto'; export class CreateThreadAssistantDto implements ThreadAssistantInfo { @ApiProperty({ description: 'The unique identifier of the assistant.' }) @@ -16,7 +16,7 @@ export class CreateThreadAssistantDto implements ThreadAssistantInfo { @ApiProperty({ description: "The model's unique identifier and settings." }) @ValidateNested() - model: CreateThreadModelInfoDto; + model: CreateModelDto; @ApiProperty({ description: "The assistant's specific instructions." }) @IsOptional() diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread-model-info.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread-model-info.dto.ts deleted file mode 100644 index 9c90085cd..000000000 --- a/cortex-js/src/infrastructure/dtos/threads/create-thread-model-info.dto.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { IsOptional, IsString, ValidateNested } from 'class-validator'; -import { ModelInfo } from '@/domain/models/model.interface'; -import { ModelRuntimeParamsDto } from '@/infrastructure/dtos/models/model-runtime-params.dto'; -import { ModelSettingParamsDto } from '@/infrastructure/dtos/models/model-setting-params.dto'; -import { ApiProperty } from '@nestjs/swagger'; - -export class CreateThreadModelInfoDto implements ModelInfo { - @ApiProperty({ description: 'The unique identifier of the thread.' }) - @IsString() - id: string; - - @ApiProperty({ description: 'The settings of the thread.' }) - @ValidateNested() - settings: ModelSettingParamsDto; - - @ApiProperty({ description: 'The parameters of the thread.' }) - @ValidateNested() - parameters: ModelRuntimeParamsDto; - - @ApiProperty({ - description: 'The engine used in the thread to operate the model.', - }) - @IsOptional() - @IsString() - engine?: string; -} diff --git a/cortex-js/src/infrastructure/entities/model.entity.ts b/cortex-js/src/infrastructure/entities/model.entity.ts index 878ad192f..052eb7d22 100644 --- a/cortex-js/src/infrastructure/entities/model.entity.ts +++ b/cortex-js/src/infrastructure/entities/model.entity.ts @@ -1,47 +1,59 @@ -import { - Model, - ModelArtifact, - ModelFormat, - ModelMetadata, - ModelRuntimeParams, - ModelSettingParams, -} from '@/domain/models/model.interface'; +import { Model, ModelArtifact } from '@/domain/models/model.interface'; import { Column, Entity, PrimaryColumn } from 'typeorm'; @Entity('models') export class ModelEntity implements Model { + // Cortex Meta @PrimaryColumn() - id: string; + model: string; @Column() - object: string; + name: string; @Column() version: string; + @Column({ type: 'simple-json' }) + files: string[] | ModelArtifact; + + // Model Input / Output Syntax @Column() - format: ModelFormat; + prompt_template: string; @Column({ type: 'simple-json' }) - sources: ModelArtifact[]; + stop: string[]; @Column() - name: string; + max_tokens: number; + // Results Preferences @Column() - created: number; + top_p: number; @Column() - description: string; + temperature: number; - @Column({ type: 'simple-json' }) - settings: ModelSettingParams; + @Column() + frequency_penalty: number; - @Column({ type: 'simple-json' }) - parameters: ModelRuntimeParams; + @Column() + presence_penalty: number; - @Column({ type: 'simple-json' }) - metadata: ModelMetadata; + @Column() + stream: boolean; + + // Engine Settings + @Column() + ctx_len: number; + + @Column() + ngl: number; + + @Column() + n_parallel: number; + + @Column() + cpu_threads: number; @Column() engine: string; diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts index d13eca9fd..33aee645e 100644 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts +++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts @@ -12,7 +12,7 @@ import { FileManagerService } from '@/file-manager/file-manager.service'; @Injectable() export default class CortexProvider extends OAIEngineExtension { - provider: string = 'cortex'; + provider: string = 'cortex.llamacpp'; apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`; private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`; @@ -25,47 +25,46 @@ export default class CortexProvider extends OAIEngineExtension { super(httpService); } - private async getModelDirectory(): Promise { - const dataFolderPath = await this.fileManagerService.getDataFolderPath(); - return join(dataFolderPath, 'models'); - } - override async loadModel( model: Model, settings?: ModelSettingParams, ): Promise { - const modelsContainerDir = await this.getModelDirectory(); - - const modelFolderFullPath = join( - modelsContainerDir, - normalizeModelId(model.id), - ); - const ggufFiles = readdirSync(modelFolderFullPath).filter((file) => { - return file.endsWith('.gguf'); - }); - - if (ggufFiles.length === 0) { - throw new Error('Model binary not found'); - } + const modelsContainerDir = await this.fileManagerService.getModelsPath(); + + let llama_model_path = settings?.llama_model_path; + if (!llama_model_path) { + const modelFolderFullPath = join( + modelsContainerDir, + normalizeModelId(model.model), + ); + const ggufFiles = readdirSync(modelFolderFullPath).filter((file) => { + return file.endsWith('.gguf'); + }); + + if (ggufFiles.length === 0) { + throw new Error('Model binary not found'); + } - const modelBinaryLocalPath = join(modelFolderFullPath, ggufFiles[0]); + const modelBinaryLocalPath = join(modelFolderFullPath, ggufFiles[0]); + llama_model_path = modelBinaryLocalPath; + } const cpuThreadCount = 1; // TODO: Math.max(1, nitroResourceProbe.numCpuPhysicalCore); const modelSettings = { // This is critical and requires real CPU physical core count (or performance core) - model: model.id, cpu_threads: cpuThreadCount, - ...model.settings, + ...model, ...settings, - llama_model_path: modelBinaryLocalPath, - ...(model.settings.mmproj && { - mmproj: join(modelFolderFullPath, model.settings.mmproj), - }), + llama_model_path, + ...('mmproj' in model.files && + model.files.mmproj && { + mmproj: settings?.mmproj, + }), }; // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt - if (model.settings.prompt_template) { - const promptTemplate = model.settings.prompt_template; + if (model.prompt_template) { + const promptTemplate = model.prompt_template; const prompt = this.promptTemplateConverter(promptTemplate); if (prompt?.error) { throw new Error(prompt.error); diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts index e1cbfb289..89456cb4b 100644 --- a/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts +++ b/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts @@ -3,9 +3,10 @@ import { ExtensionRepositoryImpl } from './extension.repository'; import { ExtensionRepository } from '@/domain/repositories/extension.interface'; import { CortexProviderModule } from '@/infrastructure/providers/cortex/cortex.module'; import { HttpModule } from '@nestjs/axios'; +import { FileManagerModule } from '@/file-manager/file-manager.module'; @Module({ - imports: [CortexProviderModule, HttpModule], + imports: [CortexProviderModule, HttpModule, FileManagerModule], providers: [ { provide: ExtensionRepository, diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts index a1f2c69f0..3acbdf789 100644 --- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts +++ b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts @@ -5,6 +5,7 @@ import { readdir, lstat, access } from 'fs/promises'; import { join } from 'path'; import { EngineExtension } from '@/domain/abstracts/engine.abstract'; import { appPath } from '@/infrastructure/commanders/utils/app-path'; +import { FileManagerService } from '@/file-manager/file-manager.service'; @Injectable() export class ExtensionRepositoryImpl implements ExtensionRepository { @@ -14,6 +15,7 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { constructor( @Inject('CORTEX_PROVIDER') private readonly cortexProvider: EngineExtension, + private readonly fileService: FileManagerService, ) { this.loadCoreExtensions(); this.loadExternalExtensions(); @@ -36,14 +38,15 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { return Promise.resolve(); } - loadCoreExtensions(): void { + private loadCoreExtensions(): void { const extensionsPath = join(appPath, 'src', 'extensions'); this.loadExtensions(extensionsPath); } - loadExternalExtensions(): void { + private async loadExternalExtensions() { const extensionsPath = - process.env.EXTENSIONS_PATH ?? join(appPath, 'extensions'); + process.env.EXTENSIONS_PATH ?? + join(await this.fileService.getDataFolderPath(), 'extensions'); this.loadExtensions(extensionsPath); } @@ -57,10 +60,10 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { readdir(extensionsPath).then((files) => { files.forEach(async (extension) => { - if (!(await lstat(`${extensionsPath}/${extension}`)).isDirectory()) - return; + const extensionFullPath = join(extensionsPath, extension); + if (!(await lstat(extensionFullPath)).isDirectory()) return; - import(`${extensionsPath}/${extension}`).then((extensionClass) => { + import(extensionFullPath).then((extensionClass) => { const newExtension = new extensionClass.default(); this.extensions.set(extension, newExtension); }); diff --git a/cortex-js/src/infrastructure/repositories/model/model.module.ts b/cortex-js/src/infrastructure/repositories/model/model.module.ts new file mode 100644 index 000000000..be257d804 --- /dev/null +++ b/cortex-js/src/infrastructure/repositories/model/model.module.ts @@ -0,0 +1,18 @@ +import { Module } from '@nestjs/common'; +import { CortexProviderModule } from '@/infrastructure/providers/cortex/cortex.module'; +import { HttpModule } from '@nestjs/axios'; +import { ModelRepository } from '@/domain/repositories/model.interface'; +import { ModelRepositoryImpl } from './model.repository'; +import { FileManagerModule } from '@/file-manager/file-manager.module'; + +@Module({ + imports: [CortexProviderModule, HttpModule, FileManagerModule], + providers: [ + { + provide: ModelRepository, + useClass: ModelRepositoryImpl, + }, + ], + exports: [ModelRepository], +}) +export class ModelRepositoryModule {} diff --git a/cortex-js/src/infrastructure/repositories/model/model.repository.ts b/cortex-js/src/infrastructure/repositories/model/model.repository.ts new file mode 100644 index 000000000..6401c1ec7 --- /dev/null +++ b/cortex-js/src/infrastructure/repositories/model/model.repository.ts @@ -0,0 +1,150 @@ +import { Injectable } from '@nestjs/common'; +import { join, extname, basename } from 'path'; +import { ModelRepository } from '@/domain/repositories/model.interface'; +import { Model } from '@/domain/models/model.interface'; +import { FileManagerService } from '@/file-manager/file-manager.service'; +import { + existsSync, + mkdirSync, + readFileSync, + readdirSync, + rmSync, + writeFileSync, +} from 'fs'; +import { load, dump } from 'js-yaml'; +import { normalizeModelId } from '@/infrastructure/commanders/utils/normalize-model-id'; + +@Injectable() +export class ModelRepositoryImpl implements ModelRepository { + // Initialize the Extensions Map with the key-value pairs of the core providers. + models = new Map([]); + // Map between files and models. E.g. llama3:7b -> llama3-7b.yaml + fileModel = new Map([]); + // Check whether the models have been loaded or not. + loaded = false; + + constructor(private readonly fileService: FileManagerService) { + this.loadModels(); + } + + /** + * Create a new model + * This would persist the model yaml file to the models folder + * @param object + * @returns the created model + */ + async create(object: Model): Promise { + const modelsFolderPath = join( + await this.fileService.getDataFolderPath(), + 'models', + ); + const modelYaml = dump(object); + if (!existsSync(modelsFolderPath)) mkdirSync(modelsFolderPath); + const modelsPath = + process.env.EXTENSIONS_PATH ?? (await this.fileService.getModelsPath()); + writeFileSync( + join(modelsPath, `${normalizeModelId(object.model)}.yaml`), + modelYaml, + ); + + this.models.set(object.model ?? '', object); + return Promise.resolve(object); + } + + /** + * Find all models + * This would load all the models from the models folder + * @param object + * @returns the created model + */ + findAll(): Promise { + return this.loadModels(); + } + /** + * Find one model by id + * @param id model id + * @returns the model + */ + findOne(id: string): Promise { + return this.loadModels().then(() => this.models.get(id) ?? null); + } + + /** + * Update a model + * This would update the model yaml file in the models folder + * @param id model id + * @param object model object + */ + async update(id: string, object: Partial): Promise { + const originalModel = await this.findOne(id); + if (!originalModel) throw new Error('Model not found'); + + const updatedModel = { + ...originalModel, + ...object, + } satisfies Model; + + const modelYaml = dump(updatedModel); + const modelsPath = + process.env.EXTENSIONS_PATH ?? (await this.fileService.getModelsPath()); + + writeFileSync( + join( + modelsPath, + this.fileModel.get(id) ?? `${normalizeModelId(id)}.yaml`, + ), + modelYaml, + ); + + this.models.set(id ?? '', updatedModel); + } + + /** + * Remove a model + * This would remove the model yaml file from the models folder + * @param id model id + */ + async remove(id: string): Promise { + this.models.delete(id); + const yamlFilePath = join( + await this.fileService.getModelsPath(), + this.fileModel.get(id) ?? id, + ); + if (existsSync(yamlFilePath)) rmSync(yamlFilePath); + return Promise.resolve(); + } + + /** + * Load all models + * This would load all the models from the models folder + * @returns the list of models + */ + private async loadModels(): Promise { + if (this.loaded) return Array.from(this.models.values()); + const modelsPath = + process.env.EXTENSIONS_PATH ?? (await this.fileService.getModelsPath()); + + if (!existsSync(modelsPath)) return []; + + const modelFiles = readdirSync(modelsPath) + .filter( + (file) => + extname(file).toLowerCase() === '.yaml' || + extname(file).toLowerCase() === '.yml', + ) + .map((file) => join(modelsPath, file)); + + modelFiles.forEach(async (modelFile) => { + const model = readFileSync(modelFile, 'utf8'); + const yamlObject = load(model) as Model; + const fileName = basename(modelFile); + + if (yamlObject) { + this.fileModel.set(yamlObject.model, fileName); + this.models.set(yamlObject.model, yamlObject); + } + }); + this.loaded = true; + return Array.from(this.models.values()); + } +} diff --git a/cortex-js/src/usecases/chat/chat.module.ts b/cortex-js/src/usecases/chat/chat.module.ts index e69b10b73..852a13511 100644 --- a/cortex-js/src/usecases/chat/chat.module.ts +++ b/cortex-js/src/usecases/chat/chat.module.ts @@ -3,9 +3,10 @@ import { ChatController } from '@/infrastructure/controllers/chat.controller'; import { ChatUsecases } from './chat.usecases'; import { DatabaseModule } from '@/infrastructure/database/database.module'; import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/model/model.module'; @Module({ - imports: [DatabaseModule, ExtensionModule], + imports: [DatabaseModule, ExtensionModule, ModelRepositoryModule], controllers: [ChatController], providers: [ChatUsecases], exports: [ChatUsecases], diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts index 61b0c0296..78b9cca26 100644 --- a/cortex-js/src/usecases/chat/chat.usecases.ts +++ b/cortex-js/src/usecases/chat/chat.usecases.ts @@ -1,16 +1,14 @@ -import { Inject, Injectable } from '@nestjs/common'; +import { Injectable } from '@nestjs/common'; import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto'; -import { ExtensionRepository } from '@/domain/repositories/extension.interface'; -import { Repository } from 'typeorm'; -import { ModelEntity } from '@/infrastructure/entities/model.entity'; import { EngineExtension } from '@/domain/abstracts/engine.abstract'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; +import { ModelRepository } from '@/domain/repositories/model.interface'; +import { ExtensionRepository } from '@/domain/repositories/extension.interface'; @Injectable() export class ChatUsecases { constructor( - @Inject('MODEL_REPOSITORY') - private readonly modelRepository: Repository, + private readonly modelRepository: ModelRepository, private readonly extensionRepository: ExtensionRepository, ) {} @@ -20,9 +18,7 @@ export class ChatUsecases { ): Promise { const { model: modelId } = createChatDto; const extensions = (await this.extensionRepository.findAll()) ?? []; - const model = await this.modelRepository.findOne({ - where: { id: modelId }, - }); + const model = await this.modelRepository.findOne(modelId); if (!model) { throw new ModelNotFoundException(modelId); diff --git a/cortex-js/src/usecases/models/models.module.ts b/cortex-js/src/usecases/models/models.module.ts index 2094e98c1..3d10b9868 100644 --- a/cortex-js/src/usecases/models/models.module.ts +++ b/cortex-js/src/usecases/models/models.module.ts @@ -6,6 +6,7 @@ import { CortexModule } from '@/usecases/cortex/cortex.module'; import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; import { HttpModule } from '@nestjs/axios'; import { FileManagerModule } from '@/file-manager/file-manager.module'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/model/model.module'; @Module({ imports: [ @@ -14,6 +15,7 @@ import { FileManagerModule } from '@/file-manager/file-manager.module'; ExtensionModule, HttpModule, FileManagerModule, + ModelRepositoryModule, ], controllers: [ModelsController], providers: [ModelsUsecases], diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index 980b0569e..925101745 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -1,14 +1,7 @@ import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; -import { ModelEntity } from '@/infrastructure/entities/model.entity'; -import { BadRequestException, Inject, Injectable } from '@nestjs/common'; -import { Repository } from 'typeorm'; -import { - Model, - ModelFormat, - ModelRuntimeParams, - ModelSettingParams, -} from '@/domain/models/model.interface'; +import { BadRequestException, Injectable } from '@nestjs/common'; +import { Model, ModelSettingParams } from '@/domain/models/model.interface'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; import { join, basename } from 'path'; import { @@ -22,17 +15,18 @@ import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-s import { ExtensionRepository } from '@/domain/repositories/extension.interface'; import { EngineExtension } from '@/domain/abstracts/engine.abstract'; import { HttpService } from '@nestjs/axios'; -import { ModelSettingParamsDto } from '@/infrastructure/dtos/models/model-setting-params.dto'; import { normalizeModelId } from '@/infrastructure/commanders/utils/normalize-model-id'; import { firstValueFrom } from 'rxjs'; import { FileManagerService } from '@/file-manager/file-manager.service'; import { AxiosError } from 'axios'; +import { ModelRepository } from '@/domain/repositories/model.interface'; +import { ModelDto } from '@/infrastructure/dtos/models/model-successfully-created.dto'; +import { ModelParameterParser } from '@/infrastructure/commanders/utils/model-parameter.parser'; @Injectable() export class ModelsUsecases { constructor( - @Inject('MODEL_REPOSITORY') - private readonly modelRepository: Repository, + private readonly modelRepository: ModelRepository, private readonly extensionRepository: ExtensionRepository, private readonly fileManagerService: FileManagerService, private readonly httpService: HttpService, @@ -41,23 +35,17 @@ export class ModelsUsecases { async create(createModelDto: CreateModelDto) { const model: Model = { ...createModelDto, - object: 'model', - created: Date.now(), }; - await this.modelRepository.insert(model); + await this.modelRepository.create(model); } async findAll(): Promise { - return this.modelRepository.find(); + return this.modelRepository.findAll(); } - async findOne(id: string) { - return this.modelRepository.findOne({ - where: { - id, - }, - }); + async findOne(model: string) { + return this.modelRepository.findOne(model); } async getModelOrThrow(id: string): Promise { @@ -72,45 +60,8 @@ export class ModelsUsecases { return this.modelRepository.update(id, updateModelDto); } - async updateModelSettingParams( - id: string, - settingParams: ModelSettingParams, - ): Promise { - const model = await this.getModelOrThrow(id); - const currentSettingParams = model.settings; - const updateDto: UpdateModelDto = { - settings: { - ...currentSettingParams, - ...settingParams, - }, - }; - await this.update(id, updateDto); - return updateDto.settings ?? {}; - } - - async updateModelRuntimeParams( - id: string, - runtimeParams: ModelRuntimeParams, - ): Promise { - const model = await this.getModelOrThrow(id); - const currentRuntimeParams = model.parameters; - const updateDto: UpdateModelDto = { - parameters: { - ...currentRuntimeParams, - ...runtimeParams, - }, - }; - await this.update(id, updateDto); - return updateDto.parameters ?? {}; - } - - private async getModelDirectory(): Promise { - const dataFolderPath = await this.fileManagerService.getDataFolderPath(); - return join(dataFolderPath, 'models'); - } - async remove(id: string) { - const modelsContainerDir = await this.getModelDirectory(); + const modelsContainerDir = await this.fileManagerService.getModelsPath(); if (!existsSync(modelsContainerDir)) { return; } @@ -118,7 +69,7 @@ export class ModelsUsecases { const modelFolder = join(modelsContainerDir, normalizeModelId(id)); return this.modelRepository - .delete(id) + .remove(id) .then(() => rmdirSync(modelFolder, { recursive: true })) .then(() => { return { @@ -130,7 +81,7 @@ export class ModelsUsecases { async startModel( modelId: string, - settings?: ModelSettingParamsDto, + settings?: ModelDto, ): Promise { const model = await this.getModelOrThrow(modelId); const extensions = (await this.extensionRepository.findAll()) ?? []; @@ -145,23 +96,34 @@ export class ModelsUsecases { }; } + const parser = new ModelParameterParser(); + const loadModelSettings: ModelSettingParams = { + // Default settings + ctx_len: 4096, + ngl: 100, + ...(Array.isArray(model?.files) && + !('llama_model_path' in model) && { + llama_model_path: (model.files as string[])[0], + }), + engine: 'cortex.llamacpp', + // User / Model settings + ...parser.parseModelEngineSettings(model), + ...parser.parseModelEngineSettings(settings ?? {}), + }; + return engine - .loadModel(model, settings) + .loadModel(model, loadModelSettings) .then(() => ({ message: 'Model loaded successfully', modelId, })) - .catch((e) => - e.code === AxiosError.ERR_BAD_REQUEST - ? { - message: 'Model already loaded', - modelId, - } - : { - message: 'Model failed to load', - modelId, - }, - ); + .catch((e) => ({ + message: + e.code === AxiosError.ERR_BAD_REQUEST + ? 'Model already loaded' + : 'Model failed to load', + modelId, + })); } async stopModel(modelId: string): Promise { @@ -193,23 +155,27 @@ export class ModelsUsecases { async downloadModel(modelId: string, callback?: (progress: number) => void) { const model = await this.getModelOrThrow(modelId); - if (model.format === ModelFormat.API) { - throw new BadRequestException('Cannot download remote model'); - } + // TODO: We will support splited gguf files in the future + // Leave it as is for now (first element of the array) + const downloadUrl = Array.isArray(model.files) + ? model.files[0] + : model.files.llama_model_path; - const downloadUrl = model.sources[0].url; + if (!downloadUrl) { + throw new BadRequestException('No model URL provided'); + } if (!this.isValidUrl(downloadUrl)) { throw new BadRequestException(`Invalid download URL: ${downloadUrl}`); } const fileName = basename(downloadUrl); - const modelsContainerDir = await this.getModelDirectory(); + const modelsContainerDir = await this.fileManagerService.getModelsPath(); if (!existsSync(modelsContainerDir)) { mkdirSync(modelsContainerDir, { recursive: true }); } - const modelFolder = join(modelsContainerDir, normalizeModelId(model.id)); + const modelFolder = join(modelsContainerDir, normalizeModelId(model.model)); await promises.mkdir(modelFolder, { recursive: true }); const destination = join(modelFolder, fileName);