feat: refactor cortex API with new model.yaml structure

janhq · Jun 4, 2024 · 494a155 · 494a155
1 parent b879f66
commit 494a155
Show file tree

Hide file tree

Showing 30 changed files with 545 additions and 603 deletions.
diff --git a/cortex-js/src/app.module.ts b/cortex-js/src/app.module.ts
@@ -7,6 +7,7 @@ import { DatabaseModule } from './infrastructure/database/database.module';
 import { ChatModule } from './usecases/chat/chat.module';
 import { AssistantsModule } from './usecases/assistants/assistants.module';
 import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
+import { ModelRepositoryModule } from './infrastructure/repositories/model/model.module';
 import { CortexModule } from './usecases/cortex/cortex.module';
 import { ConfigModule } from '@nestjs/config';
 import { env } from 'node:process';
@@ -31,6 +32,7 @@ import { FileManagerModule } from './file-manager/file-manager.module';
     CortexModule,
     ExtensionModule,
     FileManagerModule,
+    ModelRepositoryModule,
   ],
   providers: [SeedService],
 })

diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts
@@ -1,21 +1,6 @@
-/**
- * Represents the information about a model.
- * @stored
- */
-export interface ModelInfo {
-  id: string;
-  settings: ModelSettingParams;
-  parameters: ModelRuntimeParams;
-  engine?: string;
-}
-
 export interface ModelArtifact {
-  url: string;
-}
-
-export enum ModelFormat {
-  GGUF = 'gguf',
-  API = 'api',
+  mmproj?: string;
+  llama_model_path?: string;
 }
 
 /**
@@ -24,64 +9,91 @@ export enum ModelFormat {
  */
 export interface Model {
   /**
-   * The type of the object.
-   * Default: "model"
+   * Model identifier.
    */
-  object: string;
+  model: string;
 
   /**
-   * The version of the model.
+   * GGUF metadata: general.name
    */
-  version: string;
+  name?: string;
 
   /**
-   * The format of the model.
+   * GGUF metadata: version
    */
-  format: ModelFormat;
+  version?: string;
 
   /**
    * The model download source. It can be an external url or a local filepath.
    */
-  sources: ModelArtifact[];
+  files: string[] | ModelArtifact;
+
+  /**
+   * GGUF metadata: tokenizer.chat_template
+   */
+  prompt_template?: string;
+
+  /**
+   * Defines specific tokens or phrases at which the model will stop generating further output.
+   */
+  end_token?: string[];
+
+  /// Inferencing
+  /**
+   * Set probability threshold for more relevant outputs.
+   */
+  top_p?: number;
 
   /**
-   * The model identifier, which can be referenced in the API endpoints.
+   * Controls the randomness of the model’s output.
    */
-  id: string;
+  temperature?: number;
 
   /**
-   * Human-readable name that is used for UI.
+   * Adjusts the likelihood of the model repeating words or phrases in its output.
    */
-  name: string;
+  frequency_penalty?: number;
+
+  /**
+   * Influences the generation of new and varied concepts in the model’s output.
+   */
+  presence_penalty?: number;
 
+  /// Engines
   /**
-   * The Unix timestamp (in seconds) for when the model was created
+   * The context length for model operations varies; the maximum depends on the specific model used.
    */
-  created: number;
+  ctx_length?: number;
 
   /**
-   * Default: "A cool model from Huggingface"
+   * Enable real-time data processing for faster predictions.
    */
-  description: string;
+  stream?: boolean;
+
+  /*
+   * The maximum number of tokens the model will generate in a single response.
+   */
+  max_tokens?: number;
 
   /**
-   * The model settings.
+   * The number of layers to load onto the GPU for acceleration.
    */
-  settings: ModelSettingParams;
+  ngl?: number;
 
   /**
-   * The model runtime parameters.
+   * The number of parallel operations. Only set when enable continuous batching.
    */
-  parameters: ModelRuntimeParams;
+  n_parallel?: number;
 
   /**
-   * Metadata of the model.
+   * Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)
    */
-  metadata: ModelMetadata;
+  cpu_threads?: number;
+
   /**
    * The model engine.
    */
-  engine: string;
+  engine?: string;
 }
 
 export interface ModelMetadata {

diff --git a/cortex-js/src/domain/models/thread.interface.ts b/cortex-js/src/domain/models/thread.interface.ts
@@ -1,5 +1,5 @@
 import { AssistantTool } from './assistant.interface';
-import { ModelInfo } from './model.interface';
+import { Model } from './model.interface';
 
 export interface Thread {
   /** Unique identifier for the thread, generated by default using the ULID method. **/
@@ -40,7 +40,7 @@ export interface ThreadMetadata {
 export interface ThreadAssistantInfo {
   assistant_id: string;
   assistant_name: string;
-  model: ModelInfo;
+  model: Model;
   instructions?: string;
   tools?: AssistantTool[];
 }
diff --git a/cortex-js/src/domain/repositories/model.interface.ts b/cortex-js/src/domain/repositories/model.interface.ts
@@ -0,0 +1,4 @@
+import { Model } from '../models/model.interface';
+import { Repository } from './repository.interface';
+
+export abstract class ModelRepository extends Repository<Model> {}
diff --git a/cortex-js/src/file-manager/file-manager.service.ts b/cortex-js/src/file-manager/file-manager.service.ts
@@ -10,6 +10,7 @@ export class FileManagerService {
   private configFile = '.cortexrc';
   private cortexDirectoryName = 'cortex';
   private modelFolderName = 'models';
+  private extensionFoldername = 'extensions';
   private cortexCppFolderName = 'cortex-cpp';
 
   async getConfig(): Promise<Config> {
@@ -75,4 +76,14 @@ export class FileManagerService {
     const config = await this.getConfig();
     return config.dataFolderPath;
   }
+
+  async getModelsPath(): Promise<string> {
+    const dataFolderPath = await this.getDataFolderPath();
+    return join(dataFolderPath, this.modelFolderName);
+  }
+
+  async getExtensionsPath(): Promise<string> {
+    const dataFolderPath = await this.getDataFolderPath();
+    return join(dataFolderPath, this.extensionFoldername);
+  }
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
@@ -101,14 +101,12 @@ export class ModelPullCommand extends CommandRunner {
 
     const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName };
     let ggufUrl: string | undefined = undefined;
-    let fileSize = 0;
     for await (const fileInfo of listFiles({
       repo: repo,
       revision: revision,
     })) {
       if (fileInfo.path.endsWith('.gguf')) {
         ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`;
-        fileSize = fileInfo.size;
         break;
       }
     }
@@ -121,7 +119,6 @@ export class ModelPullCommand extends CommandRunner {
     await this.modelsCliUsecases.pullModelWithExactUrl(
       `${sanitizedRepoName}/${revision}`,
       ggufUrl,
-      fileSize,
     );
   }
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts
@@ -48,7 +48,7 @@ export class ModelStartCommand extends CommandRunner {
       message: 'Select a model to start:',
       choices: models.map((e) => ({
         name: e.name,
-        value: e.id,
+        value: e.model,
       })),
     });
     return model;

diff --git a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts
@@ -1,11 +1,7 @@
 import { CommandRunner, SubCommand, Option } from 'nest-commander';
 import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
 import { exit } from 'node:process';
-import { ModelParameterParser } from '../utils/model-parameter.parser';
-import {
-  ModelRuntimeParams,
-  ModelSettingParams,
-} from '@/domain/models/model.interface';
+import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
 
 type UpdateOptions = {
   model?: string;
@@ -31,42 +27,13 @@ export class ModelUpdateCommand extends CommandRunner {
       exit(0);
     }
 
-    const parser = new ModelParameterParser();
-    const settingParams: ModelSettingParams = {};
-    const runtimeParams: ModelRuntimeParams = {};
+    const toUpdate: UpdateModelDto = {};
 
     options.forEach((option) => {
       const [key, stringValue] = option.split('=');
-      if (parser.isModelSettingParam(key)) {
-        const value = parser.parse(key, stringValue);
-        // @ts-expect-error did the check so it's safe
-        settingParams[key] = value;
-      } else if (parser.isModelRuntimeParam(key)) {
-        const value = parser.parse(key, stringValue);
-        // @ts-expect-error did the check so it's safe
-        runtimeParams[key] = value;
-      }
+      Object.assign(toUpdate, { key, stringValue });
     });
-
-    if (Object.keys(settingParams).length > 0) {
-      const updatedSettingParams =
-        await this.modelsCliUsecases.updateModelSettingParams(
-          modelId,
-          settingParams,
-        );
-      console.log(
-        'Updated setting params! New setting params:',
-        updatedSettingParams,
-      );
-    }
-
-    if (Object.keys(runtimeParams).length > 0) {
-      await this.modelsCliUsecases.updateModelRuntimeParams(
-        modelId,
-        runtimeParams,
-      );
-      console.log('Updated runtime params! New runtime params:', runtimeParams);
-    }
+    this.modelsCliUsecases.updateModel(modelId, toUpdate);
   }
 
   @Option({

diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
@@ -62,7 +62,7 @@ export class RunCommand extends CommandRunner {
       message: 'Select a model to start:',
       choices: models.map((e) => ({
         name: e.name,
-        value: e.id,
+        value: e.model,
       })),
     });
     return model;

diff --git a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts
@@ -15,7 +15,6 @@ import { Thread } from '@/domain/models/thread.interface';
 import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto';
 import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases';
 import { CreateThreadAssistantDto } from '@/infrastructure/dtos/threads/create-thread-assistant.dto';
-import { CreateThreadModelInfoDto } from '@/infrastructure/dtos/threads/create-thread-model-info.dto';
 import { ModelsUsecases } from '@/usecases/models/models.usecases';
 import stream from 'stream';
 import { CreateMessageDto } from '@/infrastructure/dtos/messages/create-message.dto';
@@ -194,16 +193,10 @@ export class ChatCliUsecases {
     const assistant = await this.assistantUsecases.findOne('jan');
     if (!assistant) throw new Error('No assistant available');
 
-    const createThreadModel: CreateThreadModelInfoDto = {
-      id: modelId,
-      settings: model.settings,
-      parameters: model.parameters,
-    };
-
     const assistantDto: CreateThreadAssistantDto = {
       assistant_id: assistant.id,
       assistant_name: assistant.name,
-      model: createThreadModel,
+      model: model,
     };
 
     const createThreadDto: CreateThreadDto = {