janhq · louis-menlo · Jun 5, 2024 · Jun 3, 2024 · Jun 5, 2024 · Jun 5, 2024
diff --git a/cortex-js/src/app.module.ts b/cortex-js/src/app.module.ts
@@ -7,6 +7,7 @@ import { DatabaseModule } from './infrastructure/database/database.module';
 import { ChatModule } from './usecases/chat/chat.module';
 import { AssistantsModule } from './usecases/assistants/assistants.module';
 import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
+import { ModelRepositoryModule } from './infrastructure/repositories/model/model.module';
 import { CortexModule } from './usecases/cortex/cortex.module';
 import { ConfigModule } from '@nestjs/config';
 import { env } from 'node:process';
@@ -31,6 +32,7 @@ import { FileManagerModule } from './file-manager/file-manager.module';
     CortexModule,
     ExtensionModule,
     FileManagerModule,
+    ModelRepositoryModule,
   ],
   providers: [SeedService],
 })

diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts
@@ -1,21 +1,6 @@
-/**
- * Represents the information about a model.
- * @stored
- */
-export interface ModelInfo {
-  id: string;
-  settings: ModelSettingParams;
-  parameters: ModelRuntimeParams;
-  engine?: string;
-}
-
 export interface ModelArtifact {
-  url: string;
-}
-
-export enum ModelFormat {
-  GGUF = 'gguf',
-  API = 'api',
+  mmproj?: string;
+  llama_model_path?: string;
 }
 
 /**
@@ -24,64 +9,91 @@ export enum ModelFormat {
  */
 export interface Model {
   /**
-   * The type of the object.
-   * Default: "model"
+   * Model identifier.
    */
-  object: string;
+  model: string;
 
   /**
-   * The version of the model.
+   * GGUF metadata: general.name
    */
-  version: string;
+  name?: string;
 
   /**
-   * The format of the model.
+   * GGUF metadata: version
    */
-  format: ModelFormat;
+  version?: string;
 
   /**
    * The model download source. It can be an external url or a local filepath.
    */
-  sources: ModelArtifact[];
+  files: string[] | ModelArtifact;
+
+  /**
+   * GGUF metadata: tokenizer.chat_template
+   */
+  prompt_template?: string;
+
+  /**
+   * Defines specific tokens or phrases at which the model will stop generating further output.
+   */
+  stop?: string[];
+
+  /// Inferencing
+  /**
+   * Set probability threshold for more relevant outputs.
+   */
+  top_p?: number;
 
   /**
-   * The model identifier, which can be referenced in the API endpoints.
+   * Controls the randomness of the model’s output.
    */
-  id: string;
+  temperature?: number;
 
   /**
-   * Human-readable name that is used for UI.
+   * Adjusts the likelihood of the model repeating words or phrases in its output.
    */
-  name: string;
+  frequency_penalty?: number;
 
   /**
-   * The Unix timestamp (in seconds) for when the model was created
+   * Influences the generation of new and varied concepts in the model’s output.
    */
-  created: number;
+  presence_penalty?: number;
 
+  /// Engines
   /**
-   * Default: "A cool model from Huggingface"
+   * The context length for model operations varies; the maximum depends on the specific model used.
    */
-  description: string;
+  ctx_len?: number;
 
   /**
-   * The model settings.
+   * Enable real-time data processing for faster predictions.
    */
-  settings: ModelSettingParams;
+  stream?: boolean;
+
+  /*
+   * The maximum number of tokens the model will generate in a single response.
+   */
+  max_tokens?: number;
 
   /**
-   * The model runtime parameters.
+   * The number of layers to load onto the GPU for acceleration.
    */
-  parameters: ModelRuntimeParams;
+  ngl?: number;
 
   /**
-   * Metadata of the model.
+   * The number of parallel operations. Only set when enable continuous batching.
    */
-  metadata: ModelMetadata;
+  n_parallel?: number;
+
+  /**
+   * Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)
+   */
+  cpu_threads?: number;
+
   /**
    * The model engine.
    */
-  engine: string;
+  engine?: string;
 }
 
 export interface ModelMetadata {
@@ -109,6 +121,8 @@ export interface ModelSettingParams {
   cont_batching?: boolean;
   vision_model?: boolean;
   text_model?: boolean;
+  engine?: string;
+  stop?: string[];
 }
 
 /**
@@ -133,8 +147,3 @@ export interface ModelRuntimeParams {
 export type ModelInitFailed = Model & {
   error: Error;
 };
-
-export interface NitroModelSettings extends ModelSettingParams {
-  llama_model_path: string;
-  cpu_threads: number;
-}
diff --git a/cortex-js/src/domain/models/thread.interface.ts b/cortex-js/src/domain/models/thread.interface.ts
@@ -1,5 +1,5 @@
 import { AssistantTool } from './assistant.interface';
-import { ModelInfo } from './model.interface';
+import { Model } from './model.interface';
 
 export interface Thread {
   /** Unique identifier for the thread, generated by default using the ULID method. **/
@@ -40,7 +40,7 @@ export interface ThreadMetadata {
 export interface ThreadAssistantInfo {
   assistant_id: string;
   assistant_name: string;
-  model: ModelInfo;
+  model: Partial<Model>;
   instructions?: string;
   tools?: AssistantTool[];
 }
diff --git a/cortex-js/src/domain/repositories/model.interface.ts b/cortex-js/src/domain/repositories/model.interface.ts
@@ -0,0 +1,4 @@
+import { Model } from '../models/model.interface';
+import { Repository } from './repository.interface';
+
+export abstract class ModelRepository extends Repository<Model> {}
diff --git a/cortex-js/src/file-manager/file-manager.service.ts b/cortex-js/src/file-manager/file-manager.service.ts
@@ -10,8 +10,13 @@ export class FileManagerService {
   private configFile = '.cortexrc';
   private cortexDirectoryName = 'cortex';
   private modelFolderName = 'models';
+  private extensionFoldername = 'extensions';
   private cortexCppFolderName = 'cortex-cpp';
 
+  /**
+   * Get cortex configs
+   * @returns the config object
+   */
   async getConfig(): Promise<Config> {
     const homeDir = os.homedir();
     const configPath = join(homeDir, this.configFile);
@@ -71,8 +76,33 @@ export class FileManagerService {
     };
   }
 
+  /**
+   * Get the app data folder path
+   * Usually it is located at the home directory > cortex
+   * @returns the path to the data folder
+   */
   async getDataFolderPath(): Promise<string> {
     const config = await this.getConfig();
     return config.dataFolderPath;
   }
+
+  /**
+   * Get the models data folder path
+   * Usually it is located at the home directory > cortex > models
+   * @returns the path to the models folder
+   */
+  async getModelsPath(): Promise<string> {
+    const dataFolderPath = await this.getDataFolderPath();
+    return join(dataFolderPath, this.modelFolderName);
+  }
+
+  /**
+   * Get the extensions data folder path
+   * Usually it is located at the home directory > cortex > extensions
+   * @returns the path to the extensions folder
+   */
+  async getExtensionsPath(): Promise<string> {
+    const dataFolderPath = await this.getDataFolderPath();
+    return join(dataFolderPath, this.extensionFoldername);
+  }
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts
@@ -14,7 +14,8 @@ export class ModelGetCommand extends CommandRunner {
       exit(1);
     }
 
-    const models = await this.modelsCliUsecases.getModel(input[0]);
-    console.log(models);
+    const model = await this.modelsCliUsecases.getModel(input[0]);
+    if (!model) console.error('Model not found');
+    else console.log(model);
   }
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts
@@ -15,10 +15,9 @@ export class ModelListCommand extends CommandRunner {
     option.format === 'table'
       ? console.table(
           models.map((e) => ({
-            id: e.id,
+            id: e.model,
             engine: e.engine,
-            format: e.format,
-            created: e.created,
+            version: e.version,
           })),
         )
       : console.log(models);

diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
@@ -2,7 +2,7 @@ import { CommandRunner, InquirerService, SubCommand } from 'nest-commander';
 import { exit } from 'node:process';
 import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
 import { RepoDesignation, listFiles } from '@huggingface/hub';
-import { basename } from 'node:path';
+import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
 
 @SubCommand({
   name: 'pull',
@@ -29,12 +29,16 @@ export class ModelPullCommand extends CommandRunner {
       ? undefined
       : await this.tryToGetBranches(input[0]);
 
-    if (!branches) {
-      await this.modelsCliUsecases.pullModel(input[0]);
-    } else {
-      // if there's metadata.yaml file, we assumed it's a JanHQ model
-      await this.handleJanHqModel(input[0], branches);
-    }
+    await this.modelsCliUsecases
+      .pullModel(
+        !branches ? input[0] : await this.handleJanHqModel(input[0], branches),
+      )
+      .catch((e: Error) => {
+        if (e instanceof ModelNotFoundException)
+          console.error('Model does not exist.');
+        else console.error(e);
+        exit(1);
+      });
 
     console.log('\nDownload complete!');
     exit(0);
@@ -83,10 +87,6 @@ export class ModelPullCommand extends CommandRunner {
   }
 
   private async handleJanHqModel(repoName: string, branches: string[]) {
-    const sanitizedRepoName = repoName.trim().startsWith(this.janHqModelPrefix)
-      ? repoName
-      : `${this.janHqModelPrefix}/${repoName}`;
-
     let selectedTag = branches[0];
 
     if (branches.length > 1) {
@@ -98,30 +98,7 @@ export class ModelPullCommand extends CommandRunner {
       console.error("Can't find model revision.");
       exit(1);
     }
-
-    const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName };
-    let ggufUrl: string | undefined = undefined;
-    let fileSize = 0;
-    for await (const fileInfo of listFiles({
-      repo: repo,
-      revision: revision,
-    })) {
-      if (fileInfo.path.endsWith('.gguf')) {
-        ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`;
-        fileSize = fileInfo.size;
-        break;
-      }
-    }
-
-    if (!ggufUrl) {
-      console.error("Can't find model file.");
-      exit(1);
-    }
-    console.log('Downloading', basename(ggufUrl));
-    await this.modelsCliUsecases.pullModelWithExactUrl(
-      `${sanitizedRepoName}/${revision}`,
-      ggufUrl,
-      fileSize,
-    );
+    // Return parsed model Id
+    return `${repoName}:${revision}`;
   }
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts
@@ -33,6 +33,16 @@ export class ModelStartCommand extends CommandRunner {
       }
     }
 
+    const existingModel = await this.modelsCliUsecases.getModel(modelId);
+    if (
+      !existingModel ||
+      !Array.isArray(existingModel.files) ||
+      /^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0])
+    ) {
+      console.error('Model is not available. Please pull the model first.');
+      process.exit(1);
+    }
+
     await this.cortexUsecases
       .startCortex(options.attach)
       .then(() => this.modelsCliUsecases.startModel(modelId, options.preset))
@@ -41,15 +51,19 @@ export class ModelStartCommand extends CommandRunner {
   }
 
   modelInquiry = async () => {
-    const models = await this.modelsCliUsecases.listAllModels();
+    const models = (await this.modelsCliUsecases.listAllModels()).filter(
+      (model) =>
+        Array.isArray(model.files) &&
+        !/^(http|https):\/\/[^/]+\/.*/.test(model.files[0]),
+    );
     if (!models.length) throw 'No models found';
     const { model } = await this.inquirerService.inquirer.prompt({
       type: 'list',
       name: 'model',
       message: 'Select a model to start:',
       choices: models.map((e) => ({
         name: e.name,
-        value: e.id,
+        value: e.model,
       })),
     });
     return model;